标题: 这也太快了吧!!!c处理csv文件(求和,平均值,最大值,最小值)3W多行秒处理! [打印本页]
作者: Gin_Q 时间: 2020-2-20 20:35 标题: 这也太快了吧!!!c处理csv文件(求和,平均值,最大值,最小值)3W多行秒处理!
本帖最后由 Gin_Q 于 2020-2-27 10:09 编辑
命令行用法:D:\GIN\c\test>csv分析.exe source.csv result.txt 1- //编译器 Dev-C++ 5.11
- //Rev 02
- //Author By Gin
- //增加打印格式
- //.cpp
-
- #include <stdio.h>
- #include <string.h>
- #include <stdlib.h>
-
-
-
- int main(int argv,char *argc[])
- {
- int delims(char *ser,char *res,int res_size);
- FILE *file_p(char *path,const char *mode);
- void form_printf(FILE *fp,char *item,double *sum,double *min,double *max_1,double *avg,int width,int T_max,int off);
- int check_width(FILE *fp);
- int check_high(FILE *fp);
-
- FILE *source_txt=file_p(argc[1],"r"); //源文件
- FILE *result_txt=file_p(argc[2],"w"); //结果
-
- int off=atoi(argc[3]); //输出结果格式
- int width=check_width(source_txt); //文件列数统计
- int high=check_high(source_txt); //文件行数统计
- int Max=10240,T_max=50;
- char temp[width][T_max]; //用于临时储存一行数据 (用于atof转换)
- char item[width][T_max]={"0"}; //储存数据项目名
- char line_size[Max]="0"; //储存一行数据空间
- double sum[width]={0}; //储存一行数据
- int nu_line=0; //用来储存文件一行数据个数
-
- rewind(source_txt);
- fgets(line_size,Max,source_txt);
- delims(line_size,item[0],T_max); //将第一行的标题单独存放
-
- int i=0,k=0,j=0,count=0;
- double min[width]={0},max_1[width]={0}; //最大值,最小值
- while (fgets(line_size,Max,source_txt)!=NULL)
- {
- count++;
- nu_line=delims(line_size,temp[0],T_max);
- for (i=1;*(temp[i])!='\0';i++) //(i=1)跳过第一列的数据
- {
- if (i<nu_line) sum[i-1]+=atof(temp[i]); //将每一项数据相加
- if (count==1)
- {
- min[i-1]=atof(temp[i]); //最少值(初始化)
- max_1[i-1]=atof(temp[i]); //最大值(初始化)
- }
- else
- {
- if (atof(temp[i]) > max_1[i-1]) max_1[i-1]=atof(temp[i]); //最大值
- if (atof(temp[i]) < min[i-1]) min[i-1]=atof(temp[i]); //最小值
- }
- }
- }
- double avg[width]={0}; //平均值
- for (i=0;i<width-1;i++)
- {
- if (i<nu_line-1) avg[i]=sum[i]/count; //最后一行如果小于width
- else
- {
- avg[i]=sum[i]/(count-1);
- }
- }
- form_printf(result_txt,item[0],sum,min,max_1,avg,width,T_max,off);
- fclose(source_txt);
- fclose(result_txt);
- return 0;
- }
- int check_width(FILE *fp)
- {
- rewind(fp);
- char t;
- int w=1;
- for (;(t=fgetc(fp))!='\n';)
- {
- if (t==',') w++;
- }
- return w;
- }
- int check_high(FILE *fp)
- {
- rewind(fp);
- int h=0,size=10240;
- char temp[size]="0";
- for (;fgets(temp,size,fp)!=NULL;h++);
- return h-1; //去掉第一行
- }
- //分割数据
- int delims(char *sou,char *res,int res_size)
- {
- int j=0,k=0,i=0,n=0;
- for (;sou[i]!='\0';i++)
- {
- if (sou[i]!=',' && sou[i]!=' ')
- {
- *(res+j*res_size+k)=sou[i];
- k++;
- }
- if (sou[i]==',')
- {
- *(res+j*res_size+k)='\0';
- j++,n++,k=0;
- }
- if (sou[i+1]=='\0') *(res+j*res_size+k-1)='\0'; //把最后一个符号替换
- }
- return n+1; //加上最后一行
- }
- FILE *file_p(char *path,const char *mode)
- {
- FILE *fp;
- if ((fp=fopen(path,mode))==NULL)
- {
- printf("%s open faile!",path);
- exit(0);
- }
- return fp;
- }
- void form_printf(FILE *fp,char *item,double *sum,double *min,double *max_1,double *avg,int width,int T_max,int off)
- {
- int i=0;
- if (off==1)
- {
- char form='+';
- char form_1='|';
- char form_2[22]="---------------------" ;
- char form_3[32]="-------------------------------" ;
- fprintf(fp,"%c%s%c%s%c%s%c%s%c%s%c\n",form,form_3,form,form_2,form,form_2,form,form_2,form,form_2,form);
- fprintf(fp,"%c %-30s%c %-20s%c %-20s%c %-20s%c %-20s%c\n",form_1,"Item",form_1,"Sum",form_1,"Min",form_1,"Max",form_1,"Avg",form_1);
- fprintf(fp,"%c%s%c%s%c%s%c%s%c%s%c\n",form,form_3,form,form_2,form,form_2,form,form_2,form,form_2,form);
- for (i=0;i<width-1;i++) fprintf(fp,"%c %-30s%c %-20f%c %-20f%c %-20f%c %-20f%c\n",form_1,item+i*T_max+T_max,form_1,sum[i],form_1,min[i],form_1,max_1[i],form_1,avg[i],form_1);
- fprintf(fp,"%c%s%c%s%c%s%c%s%c%s%c\n",form,form_3,form,form_2,form,form_2,form,form_2,form,form_2,form);
- }
- else if (off==2)
- {
- char form='+';
- char form_1='|';
- char form_2[22]="---------------------" ;
- char form_3[32]="-------------------------------" ;
- fprintf(fp,"%c%s%c%s%c%s%c%s%c%s%c\n",form,form_3,form,form_2,form,form_2,form,form_2,form,form_2,form);
- fprintf(fp,"%c %-30s%c %-20s%c %-20s%c %-20s%c %-20s%c\n",form_1,"Item",form_1,"Sum",form_1,"Min",form_1,"Max",form_1,"Avg",form_1);
- for (i=0;i<width-1;i++) fprintf(fp,"%c%s%c%s%c%s%c%s%c%s%c\n%c %-30s%c %-20f%c %-20f%c %-20f%c %-20f%c\n",form,form_3,form,form_2,form,form_2,form,form_2,form,form_2,form,form_1,item+i*T_max+T_max,form_1,sum[i],form_1,min[i],form_1,max_1[i],form_1,avg[i],form_1);
- fprintf(fp,"%c%s%c%s%c%s%c%s%c%s%c\n",form,form_3,form,form_2,form,form_2,form,form_2,form,form_2,form);
- }
- else
- {
- fprintf(fp,"%-30s%-20s%-20s%-20s%-20s\n","Item","Sum","Min","Max","Avg");
- for (i=0;i<width-1;i++) fprintf(fp,"%-30s%-20f%-20f%-20f%-20f\n",item+i*T_max+T_max,sum[i],min[i],max_1[i],avg[i]);
- }
- }
复制代码
作者: red2020 时间: 2020-2-20 22:51
回复 3# Gin_Q
最后一项用long long 就有精度了
作者: red2020 时间: 2020-2-26 23:37
本帖最后由 red2020 于 2020-2-26 23:53 编辑
回复 10# Gin_Q
你们速度都太慢了,我只需要66行代码0.3秒就能处理3万行。在windows下用gcc编译下,甭管你cpu多弱,只需0.3秒3万行。最后一行自带计时器- #include <stdio.h>
- #include <stdlib.h>
- #include <time.h>
-
- #define LINE_SIZE 1024
-
- #define FLT_MIN (1.17549e-038)
- #define FLT_MAX (3.40282e+038)
-
- #define COLS_SIZE 32
- float a[COLS_SIZE][3];
- char iName[COLS_SIZE][LINE_SIZE];
-
- int main(int argc, char** argv)
- {
- clock_t t = clock();
- FILE* fp = fopen(argv[1], "rb");
-
- for(int i = 0; i < COLS_SIZE; i ++) a[i][1] = FLT_MAX, a[i][2] = FLT_MIN;
- int countLines = 0, colsIndex = 0, strIndex = 0;
-
- char line[LINE_SIZE] = {0};
- fgets(line, LINE_SIZE, fp);
- char* p = line;
- while(*p)
- {
- switch(*p)
- {
- case ' ':
- case '\t':
- case '\r':
- case '\n':
- break;
- case ',':
- colsIndex ++;
- strIndex = 0;
- break;
- default:
- iName[colsIndex][strIndex++] = *p;
- break;
- }
- p ++;
- }
-
- colsIndex = 0;
- while(! feof(fp))
- {
- fgets(line, LINE_SIZE, fp);
- p = line;
- while(*p)
- if(*(p ++) == ',')
- {
- float t = atof(p);
- a[colsIndex][0] += t;
- if( t < a[colsIndex][1] ) a[colsIndex][1] = t;
- if( t > a[colsIndex][2] ) a[colsIndex][2] = t;
- if(++ colsIndex == 32) colsIndex = 0, countLines ++;
- }
- }
- fclose(fp);
-
- printf("%-30.30s %16s %16s %16s %16s\n","Item","Sum","Min","Max","Avg");
- for(int i = 0; i < COLS_SIZE; i ++) printf("%-30.30s -> %16.6f %16.6f %16.6f %16.6f\n", iName[i+1], a[i][0], a[i][1], a[i][2], a[i][0]/countLines);
- printf("Count %d lines spend time :%d (ms)\n", countLines, clock()-t);
- return 0;
- }
复制代码
这是效果图,整齐的一塌糊涂- Item Sum Min Max Avg
- VDDCR_GFXCurrent(A)[0](A) -> 214949.328125 2.007840 4000.015625 3.582489
- GPUTemperature(C)[0](C) -> 2227779.500000 36.745071 38.434509 37.129658
- JunctionTemperature(C)[0](C) -> 2235515.250000 36.784481 39.281551 37.258587
- MemTemperature(C)[0](C) -> 2045027.750000 34.000000 34.776291 34.083796
- VR_GFX(C)[0](C) -> 1665636.750000 27.000000 28.000000 27.760613
- VR_SOC(C)[0](C) -> 1740000.000000 29.000000 29.000000 29.000000
- VR_MEM(C)[0](C) -> 0.000000 0.000000 0.000000 0.000000
- VR_VDDCI(C)[0](C) -> 0.000000 0.000000 0.000000 0.000000
- Liquid0(C)[0](C) -> 0.000000 0.000000 0.000000 0.000000
- Liquid1(C)[0](C) -> 0.000000 0.000000 0.000000 0.000000
- PLX(C)[0](C) -> 0.000000 0.000000 0.000000 0.000000
- Min(C)[0](C) -> 2140390.000000 35.354679 36.400101 35.673167
- GFXCLKFreq[0]() -> 55465844.000000 832.818054 1224.018799 924.430733
- PWM[0]() -> 0.000000 0.000000 0.000000 0.000000
- FANSpeed[RPM][0]() -> 0.000000 0.000000 0.000000 0.000000
- LimitPPT0(W)[0](W) -> 11700000.000000 195.000000 195.000000 195.000000
- ValuePPT0(W)[0](W) -> 825928.562500 10.208000 31.805269 13.765476
- GFXActivity(%)[0](%) -> 224570.906250 0.674110 16.070761 3.742848
- PCIeLinkSpeed(GT/s)[0](GT/s) -> 342500.000000 2.500000 8.000000 5.708333
- PCIeLinkWidth[0]() -> 960000.000000 16.000000 16.000000 16.000000
- PCIeCorrectableError[0]() -> 0.000000 0.000000 0.000000 0.000000
- PCIeUncorrectableError[0]() -> 0.000000 0.000000 0.000000 0.000000
- PCIeResidencyGen1(%)[0](%) -> 2802648.250000 38.709671 51.898739 46.710804
- PCIeResidencyGen2(%)[0](%) -> 0.000000 0.000000 0.000000 0.000000
- PCIeResidencyGen3(%)[0](%) -> 3197214.750000 48.101269 61.290329 53.286913
- PCIeResidencyGen4(%)[0](%) -> 0.000000 0.000000 0.000000 0.000000
- PCIeResidencyL0(%)[0](%) -> 6000000.000000 100.000000 100.000000 100.000000
- PCIeResidencyL0s(%)[0](%) -> 0.000000 0.000000 0.000000 0.000000
- PCIeResidencyL1(%)[0](%) -> 0.000000 0.000000 0.000000 0.000000
- FanPWMreading[%][0](%) -> 0.000000 0.000000 0.000000 0.000000
- mclk[0](MHz) -> 11934225.000000 101.000000 876.000000 198.903750
- sclk[0](MHz) -> 62049740.000000 798.000000 1856.000000 1034.162333
- Count 60000 lines spend time :667 (ms)
复制代码
作者: red2020 时间: 2020-2-27 10:45
回复 14# Gin_Q
学C语言要注重细节,化繁为简,代码要求精,必要的数学技巧也得学学。看得出你只是刚学会了语法,并没有掌握这门语言。
作者: red2020 时间: 2020-2-27 21:04
回复 16# Gin_Q
所以说细节很重要,只要能认真观察,发现事物的客观规律,你就会找到编程的捷径,同样一个功能实现的方法有很多种,善于对比,发觉其中的技巧,更有助于C水平的提升。
欢迎光临 批处理之家 (http://bbs.bathome.net/) |
Powered by Discuz! 7.2 |