批处理之家 - Powered by Discuz! Board

function getDic() {
param(
[string]$filename
)
$f1 = Get-Content -Encoding UTF8 ( $filename + ".xhtml" )
$f2 = $f1 -join "`n"
$f3 = $f2 -replace '(<span class="text_3">[^<>]+</span><span class="text_1">[(][^()]+[)]</span>)',"`n`$1`n" -split "`n"
$f4 = $f3 -match "text_3"
$f5 = $f4 -replace "<[^<>]+>",""
$f5 | Out-File -Encoding utf8 ( $filename + ".txt" )
}
getDic ".\juans\F28n1071_005"
getDic ".\juans\005"

复制代码

<# :
cls&echo off&cd /d "%~dp0"&mode con lines=5000&rem bat存为ANSI/GB2312编码
path %SYSTEMROOT%\System32\WindowsPowerShell\v1.0;%path%
powershell -NoProfile -ExecutionPolicy bypass "Get-Content -literal \"%~f0\"|Out-String|Invoke-Expression"
pause
exit
#>
$folder1="D:\大藏经修改\梵文";
$folder2="D:\大藏经修改\epub解包后\F1071 釋教最上乘秘密藏陀羅尼集\OEBPS\juans";
if(-not (test-path -literal $folder1)){write-host ('"'+$folder1+'" path error or not exist');exit;}
if(-not (test-path -literal $folder2)){write-host ('"'+$folder2+'" path error or not exist');exit;}
$enc=New-Object System.Text.UTF8Encoding $False;
$files=@(dir -literal $folder1|?{('.xhtml' -eq $_.Extension) -and ($_ -is [System.IO.FileInfo])});
if($files.length -ge 1){
for($i=0;$i -lt $files.length;$i++){
write-host $files[$i].FullName -ForegroundColor yellow;
$arr=New-Object -TypeName System.Collections.ArrayList;
$text1=[IO.File]::ReadAllText($files[$i].FullName, $enc);
$m1=[regex]::matches($text1, 'class="text_3">([^<]+)');
$m2=[regex]::match($files[$i].Name, '(?i)_([0-9]+\.xhtml)$');
if($m2.Success){
$juansfile=$folder2.trimend('\')+'\'+$m2.groups[1].value;
if(test-path -literal $juansfile){
write-host $juansfile -ForegroundColor yellow;
$global:n=0
$text2=[IO.File]::ReadAllText($juansfile, $enc);
$text2=[regex]::replace($text2, 'class="text_3">([^<]+)', {
param($m3);
$str=$m3.groups[0].value;
if($global:n -lt $m1.count){
if($m3.groups[1].value.Contains($m1[$global:n].groups[1].value)){
$str=$m1[$global:n].groups[0].value;
[void]$arr.add('');
write-host ($m1[$global:n].groups[1].value+' --> '+$m3.groups[1].value);
}else{
[void]$arr.add($m1[$global:n].groups[1].value);
}
}
$global:n++;
return $str;
});
[IO.File]::WriteAllText($juansfile, $text2, $enc);
}else{write-host ('"'+$juansfile+'" not exist');}
}
for($j=$global:n;$j -lt $m1.count;$j++){
[void]$arr.add($m1[$j].groups[1].value);
}
$outfile=$folder1.trimend('\')+'\'+$files[$i].BaseName+'.txt';
[IO.File]::WriteAllLines($outfile, $arr, $enc);
write-host '';
}
}else{write-host ('no xhtmlfile in "'+$folder1+'"');}

复制代码