楼主的代码果然厉害, 又简短, 又高效, 学习!
对于 findstr, 尚在学习, 一开始思路也想到这个, 但终不熟悉于是换了方向
以下所有代码中 delims= 的后面直到 " 之间的若干空格其实都是一个 TAB 字符和一个半角空格, 复制代码请再替换一下, 看了 14 楼的提醒才知道论坛有这个问题
效率版(效率优先, 对 a.txt 文件格式变动适应差, 代码简洁度其次考虑) | @echo off & setlocal enabledelayedexpansion | | for /f "eol=[ skip=69 delims=" %%a in (a.txt) do ( | | if "!doneHead!"=="" ( | | for /f "tokens=1-4 delims=></ " %%b in ("%%a") do ( | | if /i "%%c"=="class="time"" (set ln=!ln! %%d %%e) else ( | | if "%%b"=="tbody" (echo !ln:~1!>b.txt)&(set doneHead=done)&(set lnBeratedOffs=-3) else ( | | if /i "%%b"=="td" set ln=!ln! %%c | | ) ) ) ) else ( set /a "lnBeratedOffs+=1" | | if !lnBeratedOffs! equ 0 ( | | for /f "tokens=1 delims=<> " %%b in ("%%a") do if /i "%%b"=="/table" ( | | (start b.txt)&(if exist b样本.txt comp b样本.txt b.txt /l)& exit /b | | ) else for /f "tokens=7 delims=&>?< " %%z in ("%%a") do set ln=%%z | | ) else if !lnBeratedOffs! equ 2 ( | | (set search=sTime)&(for /f "tokens=8 delims== " %%b in ("%%a") do set ln=!ln! http://bbs.bathome.net/%%~b) | | ) else if !lnBeratedOffs! geq 6 ( | | if "!search!"=="sTime" ( | | for /f "tokens=2,6,10,11 delims=<> " %%b in ("%%a") do ( | | if /i "%%b"=="class="time"" (set ln=!ln! %%c %%d-%%e)&set search=bold) | | ) else if "!search!"=="bold" ( | | (set search=cause)&(for /f "tokens=2,5 delims=<> " %%b in ("%%a") do set ln=!ln! %%b%%c) | | ) else if "!search!"=="cause" ( | | (set search=wait)&(set lnBeratedOffs=-3)&(for /f "tokens=2 delims=<> " %%b in ("%%a") do echo !ln! %%b>>b.txt) | | ) ) ) )COPY |
简洁版(以代码简洁优先, 效率及适应性考虑次之) | @echo off & setlocal enabledelayedexpansion | | for /f "eol=[ skip=69 delims=" %%l in (a.txt) do ( | | if "!doneHead!"=="" ( | | for /f "tokens=1-4 delims=></ " %%A in ("%%l") do ( | | if /i "%%A"=="tbody" (set doneHead=done)&(echo !ln:~1!>b.txt) else ( | | if /i "%%B"=="class="time"" (set ln=!ln! %%C %%D) else if /i "%%A"=="td" (set ln=!ln! %%B) | | ) ) | | ) else for /f "tokens=1-17 delims==&?></ " %%A in ("%%l") do ( | | if /i "%%A"=="tbody" (start b.txt)&(if exist b样本.txt comp b样本.txt b.txt /l)& exit /b | | if /i "%%F"=="berated" (set ln=%%K http://bbs.bathome.net/) | | if /i "%%K%%L"=="ahref" (set ln=!ln!%%~M) else if /i "%%L%%M"=="ahref" (set ln=!ln!%%~N) | | if /i "%%C"==""time"" (set ln=!ln! %%M %%P-%%Q) | | if /i "%%E"==""bold"" (set ln=!ln! %%B%%F) | | if /i "%%C"=="td" echo !ln! %%B>>b.txt | | ) )COPY |
适应版(起始不采用跳过指定行数, 而是根据搜索特定字符串找到记录表头, 适应 a.txt 文件格式变动略好) | @echo off & setlocal enabledelayedexpansion | | for /f "eol=[ delims=" %%a in (a.txt) do ( | | if "!doneHead!"=="" ( | | for /f "tokens=2 delims= " %%b in ("%%a") do if /i "%%b"=="summary="ratelogviewer"" (set doneHead=doing) | | ) else if /i "!doneHead!"=="doing" ( | | for /f "tokens=1-4 delims=></ " %%b in ("%%a") do ( | | if /i "%%c"=="class="time"" (set ln=!ln! %%d %%e) else ( | | if "%%b"=="tbody" (echo !ln:~1!>b.txt)&(set doneHead=done)&(set lnBeratedOffs=-3) else ( | | if /i "%%b"=="td" set ln=!ln! %%c | | ) ) ) | | ) else ( set /a "lnBeratedOffs+=1" | | if !lnBeratedOffs! equ 0 ( | | for /f "tokens=4,7 delims=&>?< " %%b in ("%%a") do (set ln=%%c) | | ) else if !lnBeratedOffs! equ 2 ( | | for /f "tokens=8 delims== " %%b in ("%%a") do (set ln=!ln! http://bbs.bathome.net/%%~b) | | ) else if !lnBeratedOffs! geq 6 ( | | for /f "tokens=1-6,10,11 delims=<> " %%b in ("%%a") do ( | | if /i "%%c"=="class="time"" (set ln=!ln! %%g %%h-%%i) else ( | | if /i "%%e"=="class="bold"" (set ln=!ln! %%c%%f) else ( | | if /i "%%d"=="/td" (echo !ln! %%c>>b.txt) else ( | | if /i "%%b"=="tr" (set /a "lnBeratedOffs=-1") else if /i "%%b"=="/tbody" ( | | (start b.txt)&(if exist b样本.txt comp b样本.txt b.txt /l)& exit /b | | ) ) ) ) ) ) ) )COPY |
[ 本帖最后由 neorobin 于 2009-12-18 15:22 编辑 ] |