批处理之家 - Powered by Discuz! Board

$file1 = 'E:\Test\大藏经解析\IDS.txt';
$file2 = 'E:\Test\大藏经解析\dump.txt';
$file3 = 'E:\Test\大藏经解析\a.txt';
$file4 = 'E:\Test\大藏经解析\b.txt';
$hash1 = @{};
$arr = [IO.File]::ReadAllLines($file1) -match '^u';
$count = $arr.Count;
for( $i=0; $i -lt $count; $i++ ){
$m = $arr[$i].Trim() -split '\s+';
if( !$hash1.ContainsKey($m[1]) ){ $hash1[$m[1]] = $m[0]; }
}
$hash2 = @{};
$arr = [IO.File]::ReadAllLines($file2) -match '^u';
$count = $arr.Count;
for( $i=0; $i -lt $count; $i++ ){
$m = $arr[$i].Trim() -split '\s+';
if( !$hash2.ContainsKey($m[0]) ){ $hash2[$m[0]] = 1; }
}
$arr = [IO.File]::ReadAllLines($file3) -match '^CB';
$count = $arr.Count;
$out = for( $i=0; $i -lt $count; $i++ ){
[char[]]$chs = ($arr[$i] -split '[\[\]]')[1]; #匹配中文字符
[Collections.ArrayList]$a = @();
for($j=0; $j -lt $chs.Count; $j++) {
$s = '' + $chs[$j]; #char转string
if( !$hash1.ContainsKey($s) ){ continue; }
$key = $hash1[$s];
if( $hash2.ContainsKey($key) ){ [void]$a.Add($key); }
}
if( $a.Count -eq $chs.Count ){
$arr[$i] + "`t" + ($a -join "`t");
}
}
[IO.File]::WriteAllLines($file4, $out);
echo 'Done';
[Console]::ReadKey();

复制代码

$file1 = 'E:\Test\大藏经解析\IDS.txt';
$file2 = 'E:\Test\大藏经解析\dump.txt';
$file3 = 'E:\Test\大藏经解析\a.txt';
$file4 = 'E:\Test\大藏经解析\b.txt';
#遍历 IDS.txt 每一行，加入字典
$Dic1 = New-Object 'Collections.Generic.Dictionary[string, string]';
$Dic3 = New-Object 'Collections.Generic.Dictionary[string, string]';
$arr = [IO.File]::ReadAllLines($file1) -match '^u';
$count = $arr.Count;
for( $i=0; $i -lt $count; $i++ ){
$a = $arr[$i].Trim().Split("`t ", 2, 'RemoveEmptyEntries'); #分割成2列
if( !$Dic1.ContainsKey($a[1]) ){
$Dic1.Add($a[1], $a[0]); #Dic1：key=第2列中文字符，value=第1列unicode码
}
if( !$Dic3.ContainsKey($a[0]) ){
$Dic3.Add($a[0], $a[1]); #Dic3：key=第1列unicode码，value=第2列中文字符
}
}
#遍历 dump.txt 每一行，加入字典
$Dic2 = New-Object 'Collections.Generic.Dictionary[string, string]';
$arr = [IO.File]::ReadAllLines($file2) -match '^u[0-9a-f]+\s+.*u[0-9a-f]+' -notMatch '^u3013\s'; #去掉u3013开头的行
$count = $arr.Count;
for( $i=0; $i -lt $count; $i++ ){
$a = $arr[$i].Trim().Split("`t ", 2, 'RemoveEmptyEntries'); #分割成2列
$k = forEach( $m In [regex]::Matches($a[1], '(?i)u[0-9a-f]+') ){
$m.Groups[0].Value;
}
$key = $k -join ' ';
if( !$Dic2.ContainsKey($key) ){
$Dic2.Add($key, $a[0]); #Dic2：key=第2列的多个unicode码，value=第1列的unicode码
}
}
#遍历 a.txt 每一行，如果在一行中同时匹配多个unicode码，写入b.txt
$fsw = New-Object System.IO.StreamWriter($file4, $false, [Text.Encoding]::UTF8);
$arr = [IO.File]::ReadAllLines($file3) -match '^CB';
$count = $arr.Count;
for( $i=0; $i -lt $count; $i++ ){
[char[]]$chs = $arr[$i].Split('[]')[1]; #a.txt每一行的中文字符
[Collections.ArrayList]$a = @();
for( $j=0; $j -lt $chs.Count; $j++ ){
$s = '' + $chs[$j]; #char转string
if( $Dic1.ContainsKey($s) ){
[void]$a.Add( '(?=.*' + $Dic1[$s] + '\b)' );
} else { break; }
}
if( $a.Count -ne $chs.Count ){ continue; }
$h = @{};
$reg = $a -join ''; #正则，同时匹配多个字符串
forEach( $key In ($Dic2.Keys -match $reg) ){
if( !$h.ContainsKey($Dic2[$key]) ){
$h[$Dic2[$key]] = 1; #哈希表赋值，用来去重复，key=Dump.txt的第一列
}
}
if( $h.Count -eq 0 ){ continue; }
$s = $h.Keys -join "`t";
$s = [regex]::Replace($s, '\S+', {param($m); $m.Value + '-' + $Dic3[$m.Value]});
echo( $arr[$i] + "`t" + $s );
$fsw.WriteLine( $arr[$i] + "`t" + $s );
$fsw.Flush();
}
$fsw.Dispose();
echo 'Done';
[Console]::ReadKey();

复制代码

<# :
cls
@echo off
cd /d "%~dp0"
powershell -NoProfile -ExecutionPolicy bypass "Invoke-Command -ScriptBlock ([ScriptBlock]::Create([IO.File]::ReadAllText('%~f0',[Text.Encoding]::GetEncoding('GB2312')))) -Args '%~f0'"
pause
exit
#>
$file1=".\a.txt";
$file2=".\IDS.txt";
$file3=".\dump.txt";
$file4=".\结果.txt";
$self=get-item -liter $args[0];
$path=$self.Directory.FullName;
$file1=$file1 -replace '^\.',$path;
$file2=$file2 -replace '^\.',$path;
$file3=$file3 -replace '^\.',$path;
$file4=$file4 -replace '^\.',$path;
if(-not (test-path -liter $file1)){write-host ('"'+$file1+'" not found');exit;};
if(-not (test-path -liter $file2)){write-host ('"'+$file2+'" not found');exit;};
if(-not (test-path -liter $file3)){write-host ('"'+$file3+'" not found');exit;};
$enc=[Text.Encoding]::UTF8;
$text1=[IO.File]::ReadAllLines($file1, $enc);
$text2=[IO.File]::ReadAllLines($file2, $enc);
$text3=[IO.File]::ReadAllLines($file3, $enc);
write-host 'Laoding……';
$dic1=New-Object 'System.Collections.Generic.Dictionary[string,string]';
$dic2=New-Object 'System.Collections.Generic.Dictionary[string,string]';
for($i=0;$i -lt $text2.count;$i++){
$line=$text2[$i].trim() -split '\s',2;
if(-not $dic1.ContainsKey($line[1])){$dic1.add($line[1], $line[0])};
if(-not $dic2.ContainsKey($line[0])){$dic2.add($line[0], $line[1])};
};
$dic3=New-Object 'System.Collections.Generic.Dictionary[string,object]';
for($i=0;$i -lt $text3.count;$i++){
$line=$text3[$i].trimstart() -split '\s',2;
$m=[regex]::matches($line[1], '(?i)u[\da-f]+');
if($m.count -ge 1){
[System.Collections.ArrayList]$crr=@();
foreach($k in $m){
[void]$crr.add($k.groups[0].value);
};
$drr=@($crr|sort);
$tmpline=$drr -join ':';
if(-not $dic3.ContainsKey($tmpline)){
[System.Collections.ArrayList]$key=@();
$dic3.add($tmpline, $key);
};
if($dic3[$tmpline] -notcontains $line[0]){
[void]$dic3[$tmpline].add($line[0]);
};
};
};
write-host 'Searching……';
$fs=New-Object System.IO.FileStream($file4, [System.IO.FileMode]::Create);
$sw=New-Object System.IO.StreamWriter($fs, $enc);
for($i=0;$i -lt $text1.count;$i++){
$line=$text1[$i].trim() -split '\s',2;
$m=[regex]::matches($line[1].trim('[]'),'[\ud800-\udbff][\udc00-\udfff]|[\u0000-\uffff]');
[System.Collections.ArrayList]$arr=@();
if($m.count -ge 1){
foreach($k in $m){
if($dic1.ContainsKey($k.groups[0].value)){
[void]$arr.add($dic1[$k.groups[0].value]);
};
};
};
$line=$text1[$i];
if($arr.count -ge 1){
$err=@($arr|sort);
$tmpline=$err -join ':';$tmpline
if($dic3.ContainsKey($tmpline)){
for($j=0;$j -lt $dic3[$tmpline].count;$j++){
if($dic2.ContainsKey($dic3[$tmpline][$j])){
$line+=(' '+$dic3[$tmpline][$j]+'('+$dic2[$dic3[$tmpline][$j]]+')')
};
};
};
};
write-host $line;
$sw.WriteLine($line);
$sw.Flush();
};
$sw.Close();
$fs.Close();

复制代码

$file1 = 'E:\Test\大藏经解析\IDS.txt';
$file2 = 'E:\Test\大藏经解析\dump.txt';
$file3 = 'E:\Test\大藏经解析\a.txt';
$file4 = 'E:\Test\大藏经解析\b.txt';
#遍历 IDS.txt 每一行，加入字典
$Dic1 = New-Object 'Collections.Generic.Dictionary[string, string]';
$Dic3 = New-Object 'Collections.Generic.Dictionary[string, string]';
$arr = [IO.File]::ReadAllLines($file1) -match '^u';
$count = $arr.Count;
for( $i=0; $i -lt $count; $i++ ){
$a = $arr[$i].Trim().Split("`t ", 2, 'RemoveEmptyEntries'); #分割成2列
if( !$Dic1.ContainsKey($a[1]) ){
$Dic1.Add($a[1], $a[0]); #Dic1：key=第2列中文字符，value=第1列unicode码
}
if( !$Dic3.ContainsKey($a[0]) ){
$Dic3.Add($a[0], $a[1]); #Dic3：key=第1列unicode码，value=第2列中文字符
}
}
#遍历 dump.txt 每一行，加入字典
$Dic2 = New-Object 'Collections.Generic.Dictionary[string, string]';
$arr = [IO.File]::ReadAllLines($file2) -match '^u[0-9a-f]+\s+.*u[0-9a-f]+' -notMatch '^u3013\s'; #去掉u3013开头的行
$count = $arr.Count;
for( $i=0; $i -lt $count; $i++ ){
$a = $arr[$i].Trim().Split("`t ", 2, 'RemoveEmptyEntries'); #分割成2列
$k = forEach( $m In [regex]::Matches($a[1], '(?i)u[0-9a-f]+') ){
$m.Groups[0].Value;
}
$key = $k -join ' ';
if( !$Dic2.ContainsKey($key) ){
$Dic2.Add($key, $a[0]); #Dic2：key=第2列的多个unicode码，value=第1列的unicode码
}
}
#遍历 a.txt 每一行，如果在一行中同时匹配多个unicode码，写入b.txt
$fsw = New-Object System.IO.StreamWriter($file4, $false, [Text.Encoding]::UTF8);
$arr = [IO.File]::ReadAllLines($file3) -match '^CB';
$count = $arr.Count;
for( $i=0; $i -lt $count; $i++ ){
[char[]]$chs = $arr[$i].Split('[]')[1]; #a.txt每一行的中文字符
[Collections.ArrayList]$a = @(); #数组a，存放中文字符对应的unicode码
for( $j=0; $j -lt $chs.Count; $j++ ){
$s = '' + $chs[$j]; #char转string
if( $Dic1.ContainsKey($s) ){
[void]$a.Add( $Dic1[$s] );
} else { break; }
}
if( $a.Count -ne $chs.Count ){ continue; }
$h = @{}; #哈希表，存放Dump.txt的第一列unicode码
forEach( $key In $Dic2.Keys ){
if( $h.ContainsKey($Dic2[$key]) ){ continue; }
$flag = $true;
for( $j=0; $j -lt $a.Count; $j++ ){
if( $key.indexOf($a[$j]) -lt 0 ){
$flag = $false;
break;
}
}
if( $flag ){ $h[$Dic2[$key]] = $true; }
}
if( $h.Count -eq 0 ){ continue; }
$s = $h.Keys -join "`t";
$s = [regex]::Replace($s, '\S+', {param($m); $m.Value + '-' + $Dic3[$m.Value]});
echo( $arr[$i] + "`t" + $s );
$fsw.WriteLine( $arr[$i] + "`t" + $s );
$fsw.Flush();
}
$fsw.Dispose();
echo 'Done';
[Console]::ReadKey();

复制代码