- aCode = "UTF-8"
- bCode = "GB2312"
- Set FSO = CreateObject("Scripting.FileSystemObject")
- For Each FileUrl In FSO.GetFolder(".").Files
- Ext = FSO.GetExtensionName(FileUrl)
- If Lcase(Ext) = "txt" Then
- If CheckCode(FileUrl) Or is_valid_utf8(read(FileUrl)) Then
- Call WriteToFile(FileUrl, ReadFile(FileUrl, aCode), bCode)
- End If
- End If
- Next
- MsgBox "OK"
-
- Function ReadFile(FileUrl, CharSet)
- Dim Str
- Set stm = CreateObject("Adodb.Stream")
- stm.Type = 2
- stm.mode = 3
- stm.charset = CharSet
- stm.Open
- stm.loadfromfile FileUrl
- Str = stm.readtext
- stm.Close
- Set stm = Nothing
- ReadFile = Str
- End Function
-
- Function WriteToFile (FileUrl, Str, CharSet)
- Set stm = CreateObject("Adodb.Stream")
- stm.Type = 2
- stm.mode = 3
- stm.charset = CharSet
- stm.Open
- stm.WriteText Str
- stm.SaveToFile FileUrl, 2
- stm.flush
- stm.Close
- Set stm = Nothing
- End Function
-
- Function CheckCode (FileUrl)
- Dim slz
- Set slz = CreateObject("Adodb.Stream")
- slz.Type = 1
- slz.Mode = 3
- slz.Open
- slz.Position = 0
- slz.Loadfromfile FileUrl
- Bin=slz.read(2)
- If AscB(MidB(Bin,1,1))=&HEF and AscB(MidB(Bin,2,1))=&HBB Then
- CheckCode = True
- end if
- slz.Close
- Set slz = Nothing
- End Function
-
- Function read(path)
- Dim ado, a(), i, n
- Set ado = CreateObject("ADODB.Stream")
- ado.Type = 1 : ado.Open
- ado.LoadFromFile path
- n = ado.Size - 1
- ReDim a(n)
- For i = 0 To n
- a(i) = ChrW(AscB(ado.Read(1)))
- Next
- read = Join(a, "")
- End Function
-
- Function is_valid_utf8(ByRef input) 'ByRef以提高效率
- Dim s, re
- Set re = New Regexp
- s = "[\xC0-\xDF]([^\x80-\xBF]|$)"
- s = s & "|[\xE0-\xEF].{0,1}([^\x80-\xBF]|$)"
- s = s & "|[\xF0-\xF7].{0,2}([^\x80-\xBF]|$)"
- s = s & "|[\xF8-\xFB].{0,3}([^\x80-\xBF]|$)"
- s = s & "|[\xFC-\xFD].{0,4}([^\x80-\xBF]|$)"
- s = s & "|[\xFE-\xFE].{0,5}([^\x80-\xBF]|$)"
- s = s & "|[\x00-\x7F][\x80-\xBF]"
- s = s & "|[\xC0-\xDF].[\x80-\xBF]"
- s = s & "|[\xE0-\xEF]..[\x80-\xBF]"
- s = s & "|[\xF0-\xF7]...[\x80-\xBF]"
- s = s & "|[\xF8-\xFB]....[\x80-\xBF]"
- s = s & "|[\xFC-\xFD].....[\x80-\xBF]"
- s = s & "|[\xFE-\xFE]......[\x80-\xBF]"
- s = s & "|^[\x80-\xBF]"
- re.Pattern = s
- is_valid_utf8 = (Not re.Test(input))
- End Function
复制代码 无BOM头的UTF-8文件判断用的是:
http://demon.tw/programming/vbs-validate-utf8.html |