|
|
楼主 |
发表于 2025-5-6 15:07:02
|
显示全部楼层
- /*
- COPYRIGHT@2016~2023 BY HAPPY
- BSE.EXE
- VERSION 2.0
- */
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <ctype.h>
- #include <errno.h>
- // 编码限制(单位:MB)
- #define FILE_MAX_SIZE 128
- // BASE64压缩行长(单位:字节)
- #define PRESS_LINE_SIZE 1000
- // PEM格式标记
- #define PEM_BEGIN "-----BEGIN "
- #define PEM_END "-----END "
- // 敏感词配置
- typedef struct {
- const char** words;
- size_t count;
- } SensitiveFilter;
- static const char* SENSITIVE_WORDS[] = {
- "**", "**", "taidu", "zangdu",
- "qingzhen", "fenlie", "dfj", "hsd", "xjzz"
- };
- static const SensitiveFilter SENSITIVE_FILTER = {
- SENSITIVE_WORDS,
- sizeof(SENSITIVE_WORDS)/sizeof(SENSITIVE_WORDS[0])
- };
- // 编码表
- typedef struct {
- const unsigned char base64[64];
- const unsigned char base92[256];
- const char hex[16];
- const char* bin[16];
- const char press[10];
- } EncodingTables;
- static const EncodingTables ENCODE_TABLES = {
- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
- {33,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,
- 57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,
- 81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,97,98,99,100,101,102,103,104,
- 105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,
- 123,124,125},
- "0123456789ABCDEF",
- {"0000","0001","0010","0011","0100","0101","0110","0111",
- "1000","1001","1010","1011","1100","1101","1110","1111"},
- "@-#$_}{][A"
- };
- // 解码表
- typedef struct {
- const unsigned char base64[80];
- const unsigned char base92[256];
- const unsigned char hex[23];
- } DecodingTables;
- static const DecodingTables DECODE_TABLES = {
- {0x3E,0x40,0x40,0x40,0x3F,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,
- 0x3D,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x00,0x01,0x02,0x03,0x04,0x05,
- 0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,0x10,0x11,0x12,0x13,
- 0x14,0x15,0x16,0x17,0x18,0x19,0x40,0x40,0x40,0x40,0x40,0x40,0x1A,0x1B,
- 0x1C,0x1D,0x1E,0x1F,0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,
- 0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,0x30,0x31,0x32,0x33},
- {[33]=0,[35]=1,[36]=2,[37]=3,[38]=4,[39]=5,[40]=6,[41]=7,[42]=8,[43]=9,
- [44]=10,[45]=11,[46]=12,[47]=13,[48]=14,[49]=15,[50]=16,[51]=17,[52]=18,
- [53]=19,[54]=20,[55]=21,[56]=22,[57]=23,[58]=24,[59]=25,[60]=26,[61]=27,
- [62]=28,[63]=29,[64]=30,[65]=31,[66]=32,[67]=33,[68]=34,[69]=35,[70]=36,
- [71]=37,[72]=38,[73]=39,[74]=40,[75]=41,[76]=42,[77]=43,[78]=44,[79]=45,
- [80]=46,[81]=47,[82]=48,[83]=49,[84]=50,[85]=51,[86]=52,[87]=53,[88]=54,
- [89]=55,[91]=56,[92]=57,[93]=58,[94]=59,[95]=60,[97]=61,[98]=62,[99]=63,
- [100]=64,[101]=65,[102]=66,[103]=67,[104]=68,[105]=69,[106]=70,[107]=71,
- [108]=72,[109]=73,[110]=74,[111]=75,[112]=76,[113]=77,[114]=78,[115]=79,
- [116]=80,[117]=81,[118]=82,[119]=83,[120]=84,[121]=85,[122]=86,[123]=87,
- [124]=88,[125]=89},
- {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0xFF,0xFF,0xFF,0xFF,
- 0xFF,0xFF,0xFF,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F}
- };
- // 缓冲区处理函数
- typedef struct {
- unsigned char* data;
- size_t size;
- } Buffer;
- Buffer preprocess_input(FILE* fp, int decode_mode) {
- Buffer buf = {NULL, 0};
- size_t file_size;
-
- fseek(fp, 0, SEEK_END);
- file_size = ftell(fp);
- fseek(fp, 0, SEEK_SET);
- if (!(buf.data = malloc(file_size + 1))) return buf;
- if (fread(buf.data, 1, file_size, fp) != file_size) {
- free(buf.data);
- buf.data = NULL;
- return buf;
- }
- if (decode_mode) {
- unsigned char *src = buf.data, *dst = buf.data;
- int in_pem = 0;
- while (*src) {
- if (!in_pem) {
- if (strncmp((char*)src, PEM_BEGIN, strlen(PEM_BEGIN)) {
- src += strlen(PEM_BEGIN);
- in_pem = 1;
- continue;
- }
- } else {
- if (strncmp((char*)src, PEM_END, strlen(PEM_END)) {
- in_pem = 0;
- src += strlen(PEM_END);
- continue;
- }
- }
- if (in_pem) {
- if (isspace(*src)) {
- src++;
- } else {
- *dst++ = *src++;
- }
- } else {
- src++;
- }
- }
- buf.size = dst - buf.data;
- } else {
- buf.size = file_size;
- }
- return buf;
- }
- // 敏感词检测(AC自动机实现)
- typedef struct ACNode {
- struct ACNode *children[256];
- struct ACNode *fail;
- int is_end;
- } ACNode;
- ACNode* create_node() {
- ACNode* node = calloc(1, sizeof(ACNode));
- node->fail = NULL;
- node->is_end = 0;
- return node;
- }
- void build_ac_automaton(ACNode* root, const SensitiveFilter* filter) {
- // 构建Trie树
- for (size_t i = 0; i < filter->count; i++) {
- ACNode* curr = root;
- const char* word = filter->words[i];
- for (; *word; word++) {
- unsigned char c = tolower(*word);
- if (!curr->children[c]) {
- curr->children[c] = create_node();
- }
- curr = curr->children[c];
- }
- curr->is_end = 1;
- }
- // 构建失败指针
- ACNode* queue[256];
- int front = 0, rear = 0;
- root->fail = NULL;
- queue[rear++] = root;
- while (front < rear) {
- ACNode* curr = queue[front++];
- for (int c = 0; c < 256; c++) {
- if (curr->children[c]) {
- ACNode* child = curr->children[c];
- if (curr == root) {
- child->fail = root;
- } else {
- ACNode* fail = curr->fail;
- while (fail && !fail->children[c]) fail = fail->fail;
- child->fail = fail ? fail->children[c] : root;
- }
- queue[rear++] = child;
- }
- }
- }
- }
- int ac_filter(const ACNode* root, const unsigned char* text) {
- ACNode* curr = (ACNode*)root;
- for (; *text; text++) {
- unsigned char c = tolower(*text);
- while (curr && !curr->children[c]) curr = curr->fail;
- if (!curr) curr = root;
- if (curr->children[c]) {
- curr = curr->children[c];
- if (curr->is_end) return 1;
- }
- }
- return 0;
- }
- // Base64编码核心
- Buffer base64_encode(const Buffer input, int tight_mode, int plus_mode) {
- Buffer output = {NULL, 0};
- const size_t out_len = (input.size * 4 + 2) / 3 + 4;
- if (!(output.data = malloc(out_len))) return output;
- size_t i = 0, j = 0;
- while (i < input.size) {
- uint32_t triple = 0;
- int bytes = 0;
- for (int k = 0; k < 3 && i < input.size; k++, i++) {
- triple = (triple << 8) | input.data[i];
- bytes++;
- }
- for (int k = 3; k >= 0; k--) {
- unsigned char val = (triple >> (6*k)) & 0x3F;
- if (k < 3 - (3 - bytes)) {
- output.data[j++] = ENCODE_TABLES.base64[val];
- } else {
- output.data[j++] = '=';
- }
- }
- }
- output.size = j;
- return output;
- }
- // 其余编码解码函数类似改造,限于篇幅不完整展开
- // 主函数改造
- int main(int argc, char** argv) {
- if (argc != 4) {
- print_help(stderr, EXIT_FAILURE);
- }
- FILE *in = fopen(argv[2], "rb");
- FILE *out = fopen(argv[3], "wb");
- if (!in || !out) {
- fprintf(stderr, "文件打开失败: %s\n", strerror(errno));
- exit(EXIT_FAILURE);
- }
- // 预处理输入
- Buffer input = preprocess_input(in, is_decode_mode(argv[1]));
- if (!input.data) {
- fprintf(stderr, "内存分配失败\n");
- exit(EXIT_FAILURE);
- }
- // 敏感词过滤
- ACNode* filter_root = create_node();
- build_ac_automaton(filter_root, &SENSITIVE_FILTER);
- if (ac_filter(filter_root, input.data)) {
- fprintf(stderr, "检测到敏感内容\n");
- exit(EXIT_FAILURE);
- }
- // 根据模式处理数据
- Buffer output;
- switch (parse_mode(argv[1])) {
- case MODE_ENCODE_BASE64:
- output = base64_encode(input, 0, 0);
- break;
- // 其他模式处理
- default:
- print_help(stderr, EXIT_FAILURE);
- }
- if (fwrite(output.data, 1, output.size, out) != output.size) {
- fprintf(stderr, "写入失败\n");
- exit(EXIT_FAILURE);
- }
- // 资源清理
- free(input.data);
- free(output.data);
- fclose(in);
- fclose(out);
- return EXIT_SUCCESS;
- }
- // 完整实现需要约2000行代码,此处为核心改进示意
复制代码 |
|