[新手上路]批处理新手入门导读[视频教程]批处理基础视频教程[视频教程]VBS基础视频教程[批处理精品]批处理版照片整理器
[批处理精品]纯批处理备份&还原驱动[批处理精品]CMD命令50条不能说的秘密[在线下载]第三方命令行工具[在线帮助]VBScript / JScript 在线参考
返回列表 发帖
本帖最后由 WHY 于 2023-1-3 14:15 编辑

Test.ps1,右键使用PowerShell运行,增加 utf-7 编码识别
  1. function Get-Encoding($filePath){
  2.     $reg = '[\xC0-\xDF](?:[^\x80-\xBF]|$)';
  3.     $reg += '|[\xE0-\xEF].{0,1}(?:[^\x80-\xBF]|$)';
  4.     $reg += '|[\xF0-\xF7].{0,2}(?:[^\x80-\xBF]|$)';
  5.     $reg += '|[\xF8-\xFB].{0,3}(?:[^\x80-\xBF]|$)';
  6.     $reg += '|[\xFC-\xFD].{0,4}(?:[^\x80-\xBF]|$)';
  7.     $reg += '|[\xFE-\xFE].{0,5}(?:[^\x80-\xBF]|$)';
  8.     $reg += '|[\x00-\x7F][\x80-\xBF]';
  9.     $reg += '|[\xC0-\xDF].[\x80-\xBF]';
  10.     $reg += '|[\xE0-\xEF]..[\x80-\xBF]';
  11.     $reg += '|[\xF0-\xF7]...[\x80-\xBF]';
  12.     $reg += '|[\xF8-\xFB]....[\x80-\xBF]';
  13.     $reg += '|[\xFC-\xFD].....[\x80-\xBF]';
  14.     $reg += '|[\xFE-\xFE]......[\x80-\xBF]';
  15.     $reg += '|^[\x80-\xBF]';
  16.     $byte = [IO.File]::ReadAllBytes($filePath);
  17.     $BOM  = [BitConverter]::ToString($byte[0..3]);
  18.     If ($BOM -eq 'FF-FE-00-00'){
  19.         return (New-Object System.Text.UTf32Encoding $false, $true); #UTF32LE with BOM
  20.     } elseIf ($BOM -eq '00-00-FE-FF'){
  21.         return (New-Object System.Text.UTf32Encoding $true, $true);  #UTF32BE with BOM
  22.     } elseIf ($BOM.StartsWith('FF-FE') -or $BOM.StartsWith('FE-FF')){
  23.         return [Text.Encoding]::GetEncoding('UNICODE');              #UTF16 with BOM
  24.     } elseIf ($BOM.StartsWith('EF-BB-BF')){
  25.         return [Text.Encoding]::GetEncoding('UTF-8');                #UTF8 with BOM
  26.     } elseIf ($BOM.StartsWith('2B-2F-76')){
  27.         return [Text.Encoding]::GetEncoding('UTF-7');                #UTF7 with BOM
  28.     } else {
  29.         $m = [regex]::Match([char[]]$byte -join '', $reg);
  30.         If ($m.Success){
  31.             return [Text.Encoding]::GetEncoding('GB2312');           #ANSI
  32.         } else {
  33.             return [Text.Encoding]::GetEncoding('UTF-8');            #UTF8 without BOM
  34.         }
  35.     }
  36. }
  37. $path = $MyInvocation.MyCommand.Path -replace '\\[^\\]*$', '\';     #脚本自身路径
  38. $dstFolder = $path + 'Result\';                                     #目标文件路径
  39. if(![IO.Directory]::Exists($dstFolder)){$null = md $dstFolder};     #创建目标目录
  40. forEach( $file In (dir -Literal $path -Filter *.txt) ){
  41.     $enc = Get-Encoding $file.FullName;                             #获取编码
  42.     $arr = [IO.File]::ReadAllLines($file.FullName, $enc);
  43.     $arr = $arr -NotMatch 'PS[0-9]*:';   #删除包含 'PS' + 数字 + ':'的行
  44.     #另存为ansi编码
  45.     [IO.File]::WriteAllLines($dstFolder + $file.Name, $arr, [Text.Encoding]::GetEncoding('GB2312'));
  46. }
  47. echo 'Done';
  48. [Console]::ReadLine();
复制代码
2

评分人数

TOP

我贴一个VBS
  1. On Error ReSume Next
  2. Dim fso, myDir, dstFolder
  3. Set fso = CreateObject("Scripting.FileSystemObject")
  4. myDir = fso.GetFile(WSH.ScriptFullName).ParentFolder.Path           '脚本自身目录
  5. dstFolder = myDir & "\Result"                                       '目标目录
  6. If Not fso.FolderExists(dstFolder) Then fso.CreateFolder(dstFolder) '创建目标目录
  7. Dim objFile
  8. For Each objFile In fso.GetFolder(myDir).Files
  9.     If LCase(Right(objFile.Name, 4)) = ".txt" Then
  10.         If objFile.Size > 0 Then
  11.             CheckEncoding objFile.Path, dstFolder & "\" & objFile.Name
  12.         End If
  13.     End If
  14. Next
  15. Function DeleteStr(ByRef str)
  16.     Dim reg, arrIn, n, i, arrOut()
  17.     Set reg = New RegExp
  18.     reg.IgnoreCase = True
  19.     reg.Pattern = "PS[0-9]*:"      '删除包含 "PS" + 数字 + ":"的行
  20.     str = Replace(str, vbCrLf, vbLf)
  21.     arrIn = Split(str, vbLf)
  22.     n = 0
  23.     For i = 0 To UBound(arrIn)
  24.         If Not reg.Test(arrIn(i)) Then
  25.             ReDim PreServe arrOut(n)
  26.             arrOut(n) = arrIn(i)
  27.             n = n + 1
  28.         End If
  29.     Next
  30.     DeleteStr = Join(arrOut, vbCrLf)
  31. End Function
  32. Function ConvertUtf32ToUtf16(srcFile, dstFile, encName)
  33.     Dim xmlDoc, node
  34.     Set xmlDoc = CreateObject("MSXML2.DOMDocument")
  35.     Set node = xmlDoc.CreateElement("binary")
  36.     node.DataType = "bin.hex"
  37.     Dim ado, sz, i, j, arr()
  38.     Set ado = CreateObject("ADODB.Stream")
  39.     ado.Type = 1
  40.     ado.Open
  41.     ado.LoadFromFile srcFile
  42.     sz = ado.Size
  43.     ReDim arr(sz\4)
  44.     Dim h(3)
  45.     For i = 1 To sz Step 4
  46.         For j = 0 To 3
  47.             h(j) = Right("00" & Hex(AscB(ado.Read(1))), 2)
  48.         Next
  49.         If encName = "UTF32LE" Then
  50.             arr(i\4) = h(0) & h(1)
  51.         ElseIf encName = "UTF32BE" Then
  52.             arr(i\4) = h(2) & h(3)
  53.         End If
  54.     Next
  55.     node.Text = Join(arr, "")
  56.     ado.Position = 0
  57.     ado.Write node.NodeTypedValue
  58.     ado.SetEOS()
  59.     ado.SaveToFile dstFile, 2
  60.     ado.Close()
  61.     SaveFileUtf16ToAnsi dstFile, dstFile
  62. End Function
  63. Function SaveFileUtf16ToAnsi(srcFile, dstFile)
  64.     Dim f, str
  65.     Set f = fso.OpenTextFile(srcFile, 1, True, -1)
  66.     str = DeleteStr(f.ReadAll)
  67.     f.Close
  68.     fso.OpenTextFile(dstFile, 2, True).Write(str)
  69. End Function
  70. Function SaveFileUtf8ToAnsi(srcFile, dstFile, charset)
  71.     Dim ado, str
  72.     Set ado = CreateObject("ADODB.Stream")
  73.     ado.Type = 2
  74.     ado.CharSet = charset
  75.     ado.Open
  76.     ado.LoadFromFile srcFile
  77.     str = ado.ReadText(-1)
  78.     ado.Position = 0
  79.     ado.CharSet = "GB2312"
  80.     ado.WriteText DeleteStr(str)
  81.     ado.SetEOS
  82.     ado.SaveToFile dstFile, 2
  83.     ado.Close
  84. End Function
  85. Function SaveFileAnsiToAnsi(srcFile, dstFile)
  86.     Dim f, str
  87.     Set f = fso.OpenTextFile(srcFile, 1, True)
  88.     str = DeleteStr(f.ReadAll)
  89.     f.Close
  90.     fso.OpenTextFile(dstFile, 2, True).Write(str)
  91. End Function
  92. Function CheckEncoding(srcFile, dstFile)
  93.     Dim ado, i, BOM
  94.     Set ado = CreateObject("ADODB.Stream")
  95.     ado.Type = 1
  96.     ado.Open
  97.     ado.LoadFromFile srcFile
  98.     For i = 0 To 3
  99.         BOM = BOM & Right("00" & Hex(AscB(ado.Read(1))), 2)
  100.     Next
  101.     If BOM = "FFFE0000" Then
  102.         ado.Close
  103.         ConvertUtf32ToUtf16 srcFile, dstFile, "UTF32LE"
  104.     ElseIf BOM = "0000FEFF" Then
  105.         ado.Close
  106.         ConvertUtf32ToUtf16 srcFile, dstFile, "UTF32BE"
  107.     ElseIf Left(BOM, 4) = "FFFE" or Left(BOM, 4) = "FEFF" Then
  108.         ado.Close
  109.         SaveFileUtf16ToAnsi srcFile, dstFile   'UNICODE
  110.     ElseIf Left(BOM, 6) = "EFBBBF" Then
  111.         ado.Close
  112.         SaveFileUtf8ToAnsi srcFile, dstFile, "UTF-8"
  113.     ElseIf Left(BOM, 6) = "2B2F76" Then
  114.         ado.Close
  115.         SaveFileUtf8ToAnsi srcFile, dstFile, "UTF-7"
  116.     Else
  117.         Dim sz, arr()
  118.         ado.Position = 0
  119.         sz = ado.Size
  120.         ReDim arr(sz-1)
  121.         For i = 1 To sz
  122.             arr(i-1) = ChrW(AscB(ado.Read(1)))
  123.         Next
  124.         If isUTF8(arr) Then
  125.             ado.Close
  126.             SaveFileUtf8ToAnsi srcFile, dstFile, "UTF-8"
  127.         Else
  128.             ado.Close
  129.             SaveFileAnsiToAnsi srcFile, dstFile  'ANSI
  130.         End If
  131.     End If
  132. End Function
  133. Function isUTF8(ByRef arr)
  134.     Dim s, reg
  135.     s = "[\xC0-\xDF](?:[^\x80-\xBF]|$)"
  136.     s = s & "|[\xE0-\xEF].{0,1}(?:[^\x80-\xBF]|$)"
  137.     s = s & "|[\xF0-\xF7].{0,2}(?:[^\x80-\xBF]|$)"
  138.     s = s & "|[\xF8-\xFB].{0,3}(?:[^\x80-\xBF]|$)"
  139.     s = s & "|[\xFC-\xFD].{0,4}(?:[^\x80-\xBF]|$)"
  140.     s = s & "|[\xFE-\xFE].{0,5}(?:[^\x80-\xBF]|$)"
  141.     s = s & "|[\x00-\x7F][\x80-\xBF]"
  142.     s = s & "|[\xC0-\xDF].[\x80-\xBF]"
  143.     s = s & "|[\xE0-\xEF]..[\x80-\xBF]"
  144.     s = s & "|[\xF0-\xF7]...[\x80-\xBF]"
  145.     s = s & "|[\xF8-\xFB]....[\x80-\xBF]"
  146.     s = s & "|[\xFC-\xFD].....[\x80-\xBF]"
  147.     s = s & "|[\xFE-\xFE]......[\x80-\xBF]"
  148.     s = s & "|^[\x80-\xBF]"
  149.     Set reg = New RegExp
  150.     reg.Pattern = s
  151.     isUTF8 = Not reg.Test(Join(arr, ""))
  152. End Function
  153. MsgBox "Done"
复制代码
1

评分人数

TOP

返回列表