[新手上路]批处理新手入门导读[视频教程]批处理基础视频教程[视频教程]VBS基础视频教程[批处理精品]批处理版照片整理器
[批处理精品]纯批处理备份&还原驱动[批处理精品]CMD命令50条不能说的秘密[在线下载]第三方命令行工具[在线帮助]VBScript / JScript 在线参考
返回列表 发帖

[其他] 【已解决】gawk:如何优化代码减少文件读写次数

本帖最后由 思想之翼 于 2025-3-20 14:27 编辑

下列代码每个循环都会有多次磁盘IO,显然会影响性能,尤其是处理大量文件时。如何将这些中间步骤改为内存处理,通过管道或变量传递,而不是写入文件,从而提升运行速度?
  1. @echo off
  2. setlocal enabledelayedexpansion
  3.         for /l %%f in (1,1,500) do (
  4.             set "q=%%f"
  5.             gawk -v "N=35,37&38,40" -v "RS=\r?\n| " -v "ORS= " "{++d[$0]}END{split(N,g,/&/);for(l in g){split(g[l],s,/,/);delete A;for(i in s){split(s[i],r,/-/);a=r[1];b=r[2];if(b){for(j=a;j<=b;j++)A[j]}else{A[a]}};for(w in d)if(d[w] in A)print w>(\"Z:/KZ/001/\"l\"M.txt\")}}"
  6.             gawk "function comb(m,n,c,s,end,i,t,j,k){for(i=c;i<=m;i++){t=s\"\"B[i];if(length(t)==n){split(t,A,/./,C);k=1;for(j in C)A[C[j]]++;for(j in A){if(A[j]>1)k=0};if(k){u++;str=str\"\"t}}else{comb(m,n,c,t)}}}BEGIN{n=4;for(i=1;i<=n;i++)B[i]=i}{comb(n,n,1,\"\");split(str,A,/./,T);for(i=1;i<=NF;i++){split($i,A,/./,S);for(R in T){x++;v=v\"\"S[T[R]];if(x==n){printf(v\" \");x=0;v=\"\"}}}}" "Z:\KZ\001\2M.txt" > "Z:\KZ\001\4M.txt"
  7.             gawk "function comb(m,n,c,s,end,i,t,j,k){for(i=c;i<=m;i++){t=s\"\"B[i];if(length(t)==n){split(t,A,/./,C);k=1;for(j in C)A[C[j]]++;for(j in A){if(A[j]>1)k=0};if(k){u++;str=str\"\"t}}else{comb(m,n,c,t)}}}BEGIN{n=4;for(i=1;i<=n;i++)B[i]=i}{comb(n,n,1,\"\");split(str,A,/./,T);for(i=1;i<=NF;i++){split($i,A,/./,S);for(R in T){x++;v=v\"\"S[T[R]];if(x==n){printf(v\" \");x=0;v=\"\"}}}}" "Z:\KZ\001\4M.txt" > "Z:\KZ\001\6M.txt"
  8.             gawk "NR==FNR{a=length($0);next} {b=$0;c=length(b)} END{d=a-c;n=int(d/5);s=\"\";for(i=0;i<n;i++)s=s sprintf(\"%%04d \",i%%10000);printf b s>\"Z:/KZ/001/\"\"3M.txt\"}" "Z:\KZ\001\6M.txt" "Z:\KZ\001\1M.txt"
  9.             gawk -v "RS=\r?\n| " "FNR==1{fn[++n]=FILENAME}{++a[$0][fn[n]]}END{for(i in a){if(a[i][fn[1]]>a[i][fn[2]]){printf\"%%s \",i>>\"Z:/KZ/001/\"\"N1.txt\"}else if(a[i][fn[1]]<a[i][fn[2]]){printf\"%%s \",i>>\"Z:/KZ/001/\"\"N2.txt\"}else{printf\"%%s \",i>>\"Z:/KZ/001/\"\"N2.txt\"}}}" "Z:\KZ\001\3M.txt" "Z:\KZ\001\6M.txt"
  10.         )
  11. endlocal
复制代码
试写了个脚本。
  1. @echo off
  2. setlocal enabledelayedexpansion
  3. for /l %%f in (1,1,500) do (
  4.     set "q=%%f"   
  5.     "Z:/KZ/003/gawk.exe" -v "N=33,35,37&38,40" -v "RS=\r?\n| " -v "ORS= " "{++d[$0]}END{split(N,g,/&/);for(l in g){split(g[l],s,/,/);delete A;for(i in s){split(s[i],r,/-/);a=r[1];b=r[2];if(b){for(j=a;j<=b;j++)A[j]}else{A[a]}};for(w in d)if(d[w] in A)printf(l==1?\"1M:%s \":\"2M:%s \",w)}}" 2>nul | (
  6.         (for /f "tokens=1* delims=:" %%a in ('findstr /b "1M: 2M:"') do @if "%%a"=="1M" (set "M1_part=%%b" & call :Process) else echo %%b) | (
  7.             "Z:/KZ/003/gawk.exe" "function comb(m,n,c,s,end,i,t,j,k){for(i=c;i<=m;i++){t=s\"\"B[i];if(length(t)==n){split(t,A,/./,C);k=1;for(j in C)A[C[j]]++;for(j in A){if(A[j]>1)k=0};if(k){u++;str=str\"\"t}}else{comb(m,n,c,t)}}}BEGIN{n=4;for(i=1;i<=n;i++)B[i]=i}{comb(n,n,1,\"\");split(str,A,/./,T);for(i=1;i<=NF;i++){split($i,A,/./,S);for(R in T){x++;v=v\"\"S[T[R]];if(x==n){printf(v\" \");x=0;v=\"\"}}}}" | (
  8.                 "Z:/KZ/003/gawk.exe" "function comb(m,n,c,s,end,i,t,j,k){for(i=c;i<=m;i++){t=s\"\"B[i];if(length(t)==n){split(t,A,/./,C);k=1;for(j in C)A[C[j]]++;for(j in A){if(A[j]>1)k=0};if(k){u++;str=str\"\"t}}else{comb(m,n,c,t)}}}BEGIN{n=4;for(i=1;i<=n;i++)B[i]=i}{comb(n,n,1,\"\");split(str,A,/./,T);for(i=1;i<=NF;i++){split($i,A,/./,S);for(R in T){x++;v=v\"\"S[T[R]];if(x==n){printf(v\" \");x=0;v=\"\"}}}}" | (
  9.                     (cmd /v /c "echo(!M1_part!" & echo() | "Z:/KZ/003/gawk.exe" "NR==FNR{a=length($0);next} {b=$0;c=length(b)} END{d=a-c;n=int(d/5);s=\"\";for(i=0;i<n;i++)s=s sprintf(\"%%04d \",i%%10000);printf b s}" | (
  10.                         "Z:/KZ/003/gawk.exe" -v "RS=\r?\n| " "FNR==1{fn[++n]=FILENAME}{++a[$0][fn[n]]}END{for(i in a){if(a[i][fn[1]]>a[i][fn[2]]){printf\"%%s \",i>>\"Z:/KZ/003/N1.txt\"}else if(a[i][fn[1]]<a[i][fn[2]]){printf\"%%s \",i>>\"Z:/KZ/003/N2.txt\"}else{printf\"%%s \",i>>\"Z:/KZ/003/N3.txt\"}}}" - "Z:/KZ/003/6M.txt"
  11.                     )
  12.                 )
  13.             )
  14.         )
  15.     )
  16. )
  17. pause
复制代码

为啥前面换行时有^符号 ,后面的就没了
建议老老实实全放在一行或者单独弄成一个awk文件 ,这分开来写感觉就像跟cmd解析器和解析规则斗智斗勇 ,浪费精力

TOP

返回列表