本帖最后由 happy886rr 于 2016-12-5 22:48 编辑
[2016/11/30]修复了ansi编码下/G:开关的漏洞,不作版本号提升,不作重新编译,只在核心源码中作了更新。
RF.EXE
-----------------------------------------------------------------------------
取代find、findstr的正则查找工具,智能识别文本编码,自动判断BOM类型。原生支持
ANSI、UTF8、Unicode、Unicode big endian编码。准确识别有无BOM类型。
支持pcre正则表达式查找、精确字符串查找,多种开关,完全模仿微软findstr的开关及
使用方法。具体用法与findstr大同小异,请自行品味。
源码完全开放,gcc、tcc均可编译。VC稍加修改亦能通过。
-----------------------------------------------------------------------------
COPYRIGHT@2016~2018 BY HAPPY, VERSION 1.0
REGEX FIND TOOLS
-----------------------------------------------------------------------------
rf [/F|/N|/V] [/S:[match string] ]|
[/R:[pcre expression] ]|
[/G:[ANSI strings file]] [txtfile]
-----------------------------------------------------------------------------
/H Show help information
/F Finds the line to which matches
/N Print the line number
/V Shows all rows that do not contain matching regulars
/S: Finds the line to which the string matches
/R: Finds the line to which the regular expression matches
/G: Gets the matching strings from a ANSI strings file
-----------------------------------------------------------------------------
11/06/2016
图片存为a.zip解压即是
核心代码:- /*
- REGEX FIND TOOLS, VERSION 1.0
- RF.EXE
- COPYRIGHT@2016~2018 BY HAPPY
- */
- //静态编译(pcre)
- #define PCRE_STATIC
- #include "pcre.h"
- #include <stdio.h>
- #include <string.h>
- #include <locale.h>
- #include <stdbool.h>
- #include <windows.h>
-
- //定义行长(字节)
- #define BUFF_SIZE 4096
- //检测阈值(字节)
- #define CHECK_SIZE 16383
-
- //基础函数群
- char* UnicodeToANSI(const wchar_t* Str)
- {
- int L=WideCharToMultiByte(CP_ACP, 0, Str, -1, NULL, 0, NULL, NULL);
- char* Out=(char *)calloc(L+1, sizeof(char));
- WideCharToMultiByte(CP_ACP, 0, Str, -1, Out, L, NULL, NULL);
- return Out;
- }
- wchar_t* UTF8ToUnicode(const char* Str)
- {
- int L=MultiByteToWideChar(CP_UTF8, 0, Str,-1, NULL, 0);
- wchar_t* Out=(wchar_t *)calloc(L+1, sizeof(wchar_t));
- MultiByteToWideChar(CP_UTF8, 0, Str, -1, (LPWSTR)Out, L);
- return Out;
- }
- bool isUTF8(const char* Str)
- {
- if(!Str){
- return false;
- }
- const unsigned char* bytes=(const unsigned char *)Str;
- while(*bytes){
- if(
- (
- bytes[0]<=0x7F ||
- bytes[0]==0x09 ||
- bytes[0]==0x0A ||
- bytes[0]==0x0D ||
- (0x20<=bytes[0] && bytes[0]<=0x7E)
- )
- ){
- bytes+=1;
- continue;
- }
- if(
- (
- (0xC2<=bytes[0] && bytes[0]<=0xDF) &&
- (0x80<=bytes[1] && bytes[1]<=0xBF)
- )
- ){
- bytes+=2;
- continue;
- }
- if(
- (
- (bytes[0]==0xE0) &&
- (0xA0<=bytes[1] && bytes[1]<=0xBF) &&
- (0x80<=bytes[2] && bytes[2]<=0xBF)
- ) ||
- (
- (
- (0xE1<=bytes[0] && bytes[0]<=0xEC)||
- bytes[0]==0xEE ||
- bytes[0]==0xEF
- ) &&
- (0x80<=bytes[1] && bytes[1]<=0xBF) &&
- (0x80<=bytes[2] && bytes[2]<=0xBF)
- ) ||
- (
- (bytes[0]==0xED) &&
- (0x80<=bytes[1] && bytes[1]<=0x9F) &&
- (0x80<=bytes[2] && bytes[2]<=0xBF)
- )
- ){
- bytes+=3;
- continue;
- }
- if(
- (
- (bytes[0]==0xF0) &&
- (0x90<=bytes[1] && bytes[1]<=0xBF) &&
- (0x80<=bytes[2] && bytes[2]<=0xBF) &&
- (0x80<=bytes[3] && bytes[3]<=0xBF)
- ) ||
- (
- (0xF1<=bytes[0] && bytes[0]<=0xF3) &&
- (0x80<=bytes[1] && bytes[1]<=0xBF) &&
- (0x80<=bytes[2] && bytes[2]<=0xBF) &&
- (0x80<=bytes[3] && bytes[3]<=0xBF)
- ) ||
- (
- (bytes[0]==0xF4) &&
- (0x80<=bytes[1] && bytes[1]<=0x8F) &&
- (0x80<=bytes[2] && bytes[2]<=0xBF) &&
- (0x80<=bytes[3] && bytes[3]<=0xBF)
- )
- ){
- bytes+=4;
- continue;
- }
- return false;
- }
- return true;
- }
-
- //BOM检测
- int CheckBom(FILE* fp)
- {
- unsigned char* buf=(unsigned char*)calloc(3,sizeof(unsigned char));
- unsigned char* buf2;
- fseek(fp, 0, SEEK_SET);
- fread(buf, sizeof(unsigned char), 3, fp);
- if(buf[0]==0xEF && buf[1]==0xBB && buf[2]==0xBF){return 3;}
- else if(buf[0]==0xFF && buf[1]==0xFE){return 5;}
- else if(buf[0]==0xFE && buf[1]==0xFF){return 6;}
- else{
- fseek(fp, 0, SEEK_SET);
- buf2=(unsigned char*)calloc(CHECK_SIZE,sizeof(unsigned char));
- fread(buf2, sizeof(unsigned char), CHECK_SIZE, fp);
- if(isUTF8(buf2)){
- free(buf2);
- return 2;
- }
- free(buf2);
- }
- return 1;
- }
-
- //正则查找函数
- int RFindLine(FILE* fp, char* src, int FLAG)
- {
- bool mode=false;
- int BOM=0, EN=0, i=0, n=0;
- FILE* sp;
- char* Li=(char *)malloc(BUFF_SIZE*sizeof(char));
- char* LineV;char* LineU;
- pcre *re;
- int erroffset, ovector[30], rc;
- const char *error;
-
- if ( (FLAG&0x0F)==0x02 ){
- pcre_compile(src, 0, &error, &erroffset, NULL);
- if( (re=pcre_compile(src, 0, &error, &erroffset, NULL)) == NULL ){
- fputs("PCRE compilation failed", stderr);
- exit(1);
- }
- }else if( (FLAG&0x0F)==0x03 ){
- if( (sp=fopen(src, "rb"))==NULL ){
- fputs("Read matching failed", stderr);
- exit(1);
- }
- }
- //BOM偏移值
- BOM=CheckBom(fp);
- if (BOM==1 || BOM==2){
- EN=0;
- }else if(BOM==5 || BOM==6){
- EN=2;
- }else if(BOM==3){
- EN=3;
- }
- //执行偏移值
- fseek(fp, EN, SEEK_SET);
- //执行匹配过程
- if(BOM==1){
- char* Line=(char *)malloc(BUFF_SIZE*sizeof(char));
- while(!feof(fp)){
- memset(Line, 0, BUFF_SIZE*sizeof(char));
- fgets(Line, BUFF_SIZE, fp);
- i++;
- if ( (FLAG&0x0F)==0x01 ){
- if( strstr(Line, src)!=NULL ){
- mode=true;
- }else{
- mode=false;
- }
- }else if( (FLAG&0x0F)==0x02 ){
- if( pcre_exec(re, NULL, Line, strlen(Line), 0, 0, ovector, 30) >= 0 ){
- mode=true;
- }else{
- mode=false;
- }
- }else if( (FLAG&0x0F)==0x03 ){
- mode=false;
- fseek(sp, 0, SEEK_SET);
- while(!feof(sp)){
- memset(Li, 0, BUFF_SIZE*sizeof(char));
- fgets(Li, BUFF_SIZE, sp);
- char* tp=Li;
- while(*tp=='\t' ||*tp==' ' ||*tp=='\r' ||*tp=='\n'){tp++;}
- int tp_LEN=strlen(tp);
- tp[tp_LEN-2]=(tp[tp_LEN-2]=='\r')?'\0':tp[tp_LEN-2];
- if(tp[0]!='\0' &&strstr(Line, tp)!=NULL){
- mode=true;
- break;
- }
- }
- }
- //输出显示
- if( (FLAG>>4)==0x03 && mode==true ){
- fprintf(stdout, "%d:%s", i, Line);
- }else if(
- ((FLAG>>4)==0x02 && mode==false)||
- ((FLAG>>4)==0x01 && mode==true )
- ){
- fputs(Line, stdout);
- }
- }
- }else if(BOM==2 || BOM==3){
- char* Line=(char *)malloc(BUFF_SIZE*sizeof(char));
- while(!feof(fp)){
- memset(Line, 0, BUFF_SIZE*sizeof(char));
- fgets(Line, BUFF_SIZE, fp);
- i++;
- if(BOM>1){LineU=UnicodeToANSI(UTF8ToUnicode(Line));}
- if ( (FLAG&0x0F)==0x01 ){
- if( strstr(LineU, src)!=NULL ){
- mode=true;
- }else{
- mode=false;
- }
- }else if( (FLAG&0x0F)==0x02 ){
- if( pcre_exec(re, NULL, LineU, strlen(LineU), 0, 0, ovector, 30) >= 0 ){
- mode=true;
- }else{
- mode=false;
- }
- }else if( (FLAG&0x0F)==0x03 ){
- fseek(sp, 0, SEEK_SET);
- while(!feof(sp)){
- memset(Li, 0, BUFF_SIZE*sizeof(char));
- fgets(Li, BUFF_SIZE, sp);
- if( strstr(LineU, Li)!=NULL ){
- mode=true;
- break;
- }
- }
- }
- //输出显示
- if( (FLAG>>4)==0x03 && mode==true ){
- fprintf(stdout, "%d:%s", i, LineU);
- }else if(
- ((FLAG>>4)==0x02 && mode==false)||
- ((FLAG>>4)==0x01 && mode==true )
- ){
- fputs(LineU, stdout);
- }
- }
- }else if(BOM==5){ //Unicode
- wchar_t* LineW=(wchar_t *)calloc(BUFF_SIZE, sizeof(wchar_t));
- while(!feof(fp)){
- memset(LineW, 0, BUFF_SIZE*sizeof(wchar_t));
- fgetws(LineW, BUFF_SIZE, fp);
- i++;
- LineV=UnicodeToANSI(LineW);
- if ( (FLAG&0x0F)==0x01 ){
- if( strstr(LineV, src)!=NULL ){
- mode=true;
- }else{
- mode=false;
- }
- }else if( (FLAG&0x0F)==0x02 ){
- if( pcre_exec(re, NULL, LineV, strlen(LineV), 0, 0, ovector, 30) >= 0 ){
- mode=true;
- }else{
- mode=false;
- }
- }else if( (FLAG&0x0F)==0x03 ){
- fseek(sp, 0, SEEK_SET);
- while(!feof(sp)){
- memset(Li, 0, BUFF_SIZE*sizeof(char));
- fgets(Li, BUFF_SIZE, sp);
- if( strstr(LineV, Li)!=NULL ){
- mode=true;
- break;
- }
- }
- }
- if ( (FLAG>>4)==0x03 && mode==true ){
- fprintf(stdout, "%d:%s", i, LineV);
- }else if(
- ((FLAG>>4)==0x02 && mode==false)||
- ((FLAG>>4)==0x01 && mode==true )
- ){
- fputs(LineV, stdout);
- }
- }
- }else if(BOM==6){ //Unicode big endian
- wchar_t* LineW=(wchar_t *)calloc(BUFF_SIZE, sizeof(wchar_t));
- while(!feof(fp)){
- memset(LineW, 0, BUFF_SIZE*sizeof(wchar_t));
- fgets(LineW, BUFF_SIZE, fp);
- i++;
- for(n=0;LineW[n]!=0x0000;n++){
- LineW[n]=(LineW[n]&0x00FF)<<8|(LineW[n]&0xFF00)>>8;
- }
- LineV=UnicodeToANSI(LineW);
- if ( (FLAG&0x0F)==0x01 ){
- if( strstr(LineV, src)!=NULL ){
- mode=true;
- }else{
- mode=false;
- }
- }else if( (FLAG&0x0F)==0x02 ){
- if( pcre_exec(re, NULL, LineV, strlen(LineV), 0, 0, ovector, 30) >= 0 ){
- mode=true;
- }else{
- mode=false;
- }
- }else if( (FLAG&0x0F)==0x03 ){
- fseek(sp, 0, SEEK_SET);
- while(!feof(sp)){
- memset(Li, 0, BUFF_SIZE*sizeof(char));
- fgets(Li, BUFF_SIZE, sp);
- if( strstr(LineV, Li)!=NULL ){
- mode=true;
- break;
- }
- }
- }
- if ( (FLAG>>4)==0x03 && mode==true ){
- fprintf(stdout, "%d:%s", i, LineV);
- }else if(
- ((FLAG>>4)==0x02 && mode==false)||
- ((FLAG>>4)==0x01 && mode==true )
- ){
- fputs(LineV, stdout);
- }
- }
- }
- fflush(stdout);
- if( (FLAG&0x0F)==0x02 ){pcre_free(re);}
- free(Li);
- return 0;
- }
-
- //帮助信息
- void Help_Info(FILE* stream, int Exit_Code)
- {
- fprintf(stream,
- "COPYRIGHT@2016~2018 BY HAPPY, VERSION 1.0\n"
- "REGEX FIND TOOLS\n"
- "-----------------------------------------------------------------------------\n"
- "rf [/F|/N|/V] [/S:[match string] ]|\n"
- " [/R:[pcre expression] ]|\n"
- " [/G:[ANSI strings file]] [txtfile]\n"
- "-----------------------------------------------------------------------------\n"
- " /H Show help information\n"
- " /F Finds the line to which matches\n"
- " /N Print the line number\n"
- " /V Shows all rows that do not contain matching regulars\n"
- " /S: Finds the line to which the string matches\n"
- " /R: Finds the line to which the regular expression matches\n"
- " /G: Gets the matching strings from a ANSI strings file\n"
- "-----------------------------------------------------------------------------\n"
- " 11/06/2016\n"
- );
- exit(Exit_Code);
- }
-
- //主函数入口
- int main(int argc, char** argv)
- {
- FILE* fp;
- unsigned char FLAG=0;
- if( (argc==4) && (argv[1][0]=='/') && (argv[2][0]=='/') && (argv[2][2]== ':') ){
- switch(argv[1][1]){
- case 'F':
- case 'f':
- FLAG|=0x10;
- break;
- case 'V':
- case 'v':
- FLAG|=0x20;
- break;
- case 'N':
- case 'n':
- FLAG|=0x30;
- break;
- default:
- Help_Info(stderr, 2);
- }
- switch(argv[2][1]){
- case 'S':
- case 's':
- FLAG|=0x01;
- break;
- case 'R':
- case 'r':
- FLAG|=0x02;
- break;
- case 'G':
- case 'g':
- FLAG|=0x03;
- break;
- default:
- Help_Info(stderr, 1);
- }
- }else {
- Help_Info(stderr, 3);
- }
-
- if( (fp=fopen(argv[3], "rb"))==NULL ){
- fputs("Read failed", stderr);
- return 3;
- }
- RFindLine(fp, argv[2]+3, FLAG);
- fclose(fp);
- return 0;
- }
复制代码
|