本帖最后由 yu2n 于 2014-4-6 19:26 编辑
VBS 使用OpenCC词库实现词汇级别的繁简转换(By Yu2n)
示例:词汇级别简转繁
输入:
中华人民共和国是工人阶级领导的、以工农联盟为基础的人民****的社会主义国家。
干活 干杯 西太后 后天
划过来
输出:
中華人民共和國是工人階級領導的、以工農聯盟爲基礎的人民**專政的社會主義国家。
幹活 乾杯 西太后 後天
划過來
VBS / JS / Javascript 使用OpenCC词库实现词汇级别的繁简转换(By Yu2n)
源码下载:http://yu2n.sinaapp.com/file/?dir=/file/tools/OpenCC/
- Dim dictionary_path, TSCharacters, TSPhrases, STCharacters, STPhrases
- dictionary_path = Left(WScript.ScriptFullName, Len(WScript.ScriptFullName) - Len(WScript.ScriptName))
- TSCharacters = ReadText(dictionary_path + "TSCharacters.txt")
- TSPhrases = ReadText(dictionary_path + "TSPhrases.txt")
- STCharacters = ReadText(dictionary_path + "STCharacters.txt")
- STPhrases = ReadText(dictionary_path + "STPhrases.txt")
-
- ' 测试
- Call Test()
- Function Test()
- Dim str1, str2
- str1 = "中华人民共和国是工人阶级领导的、以工农联盟为基础的人民****的社会主义国家。"
- str1 = str1 & vbCrLf & "干活 干杯 西太后 后天"
- str1 = str1 & vbCrLf & "划过来"
- str2 = "中華人民共和國是工人階級領導的、以工農聯盟爲基礎的人民**專政的社會主義国家。"
- str2 = str2 & vbCrLf & "幹活 乾杯 西太后 後天"
- str2 = str2 & vbCrLf & "划過來"
-
- WScript.Echo Now() & vbTab & " TC2SC() "
- WScript.Echo Now() & VbCrLf & str1 & vbCrLf & " ==> "
- WScript.Echo Now() & VbCrLf & SC2TC(str1) & vbCrLf & vbCrLf
-
- WScript.Echo Now() & vbTab & " TC2SC() "
- WScript.Echo Now() & VbCrLf & str2 & vbCrLf & " ==> "
- WScript.Echo Now() & VbCrLf & TC2SC(str2)
- End Function
-
-
- ' 载入词库是否完成
- Function isInitDic()
- isInit = Not (STCharacters="" Or STPhrases="" Or TSCharacters="" Or TSPhrases="")
- If Not isInit Then WScript.Echo("正在载入词库,请稍等……")
- isInitDic = isInit
- End Function
-
- ' 简转繁
- Function SC2TC(str)
- If (isInitDic()) Then
- SC2TC = TCSCConverter(STPhrases & vbLf & STCharacters, str)
- Else
- SC2TC = ""
- End If
- End Function
-
- ' 繁转简
- Function TC2SC(str)
- If (isInitDic()) Then
- TC2SC = TCSCConverter(TSPhrases & vbLf & TSCharacters, str)
- Else
- TC2SC = ""
- End If
- End Function
-
- ' 使用 OpenCC 词库转换
- Function TCSCConverter(strDictionaryOpenCC, strSrc)
- ' 词组库排序替换:按字符串长度降序排序
- arr_Phrases = Split(strDictionaryOpenCC, vbLf)
- Call OpenCC_Dic_Sort(arr_Phrases) ' 将数组按字符串长度降序排序
-
- ' 词组替换
- Dim i, arr_find()
- ReDim Preserve arr_find(0)
- For i = 0 To UBound(arr_Phrases)
- If (InStr(arr_Phrases(i), vbTab) > 1) And (Len(arr_Phrases(i)) >=3) Then
- Dim str_SrcPhrases, str_DesPhrases
- str_SrcPhrases = Split(arr_Phrases(i), vbTab)(0)
- str_DesPhrases = Split(arr_Phrases(i), vbTab)(1)
- If (InStr(strSrc, str_SrcPhrases) > 0) And (str_SrcPhrases <> "" ) Then
- ReDim Preserve arr_find(UBound(arr_find)+1)
- If (InStr(str_DesPhrases, " ") > 1) And (Len(str_DesPhrases) >= 3) Then
- arr_find(UBound(arr_find)) = "<[?" & UBound(arr_find) & "?]>" & vbTab & Split(str_DesPhrases," ")(0)
- Else
- arr_find(UBound(arr_find)) = "<[?" & UBound(arr_find) & "?]>" & vbTab & str_DesPhrases
- End If
- strSrc = Replace(strSrc, str_SrcPhrases, "<[?" & UBound(arr_find) & "?]>") ' 增加替换标记
- End If
- End If
- Next
-
- ' 还原替换标记
- For i = 0 To UBound(arr_find)
- If (InStr(arr_find(i), vbTab) > 1) And (Len(arr_find(i)) >=8) Then
- 'WScript.Echo Split(arr_find(i), vbTab)(0) & vbTab & Split(arr_find(i), vbTab)(1)
-
- If (InStr(strSrc, Split(arr_find(i), vbTab)(0)) > 0) Then
- If Split(arr_find(i), vbTab)(1) <> "" Then
- strSrc = Replace(strSrc, Split(arr_find(i), vbTab)(0), Split(arr_find(i), vbTab)(1)) ' 还原替换标记
- End If
- End If
-
- End If
- Next
-
- TCSCConverter = strSrc
- End Function
-
- ' 词组库排序替换:按字符串长度降序排序
- Function OpenCC_Dic_Sort(ByRef arr)
- Dim arrTable()
- ReDim Preserve arrTable(UBound(arr),1)
- Dim intRow
- For intRow = 0 To UBound(arr)
- arrTable(intRow,0) = Len(arr(intRow))
- arrTable(intRow,1) = arr(intRow)
- Next
- Call arrSort(arrTable, 0)
- For intRow = 0 To UBound(arr)
- arr(intRow) = arrTable(intRow,1)
- Next
- End Function
- ' 二维数组排序:14W纪录级别
- 'arr --待排序二维数组
- 'intSortField --待排序字段索引
- Function arrSort(ByRef arr, ByVal intSortField)
- Const adOpenStatic = 3
- Const adUseClient = 3
- Const adDouble = 5
- Const adVarChar = 200
- Const adLongVarWChar = 203 ' DataTypeEnum: adLongVarWChar -- Indicates a long null-terminated Unicode string value.
- Set rs = CreateObject("ADODB.Recordset")
- rs.CursorLocation = adUseClient
-
- Dim intRow, intCol
- '给记录集添加字段,以前缀+索引的形式
- For intCol = 0 To UBound(arr, 2)
- If intCol = intSortField Then
- rs.Fields.append intCol, adDouble ' 数字
- Else
- rs.Fields.append intCol, adLongVarWChar, 1024
- End If
- Next
- rs.CursorType = adOpenStatic
- rs.Open
-
- '将数组插入进记录中
- For intRow = 0 To UBound(arr, 1)
- rs.AddNew
- For intCol = 0 To UBound(arr, 2)
- If intCol = intSortField Then
- rs(intCol) = CDBl(arr(intRow, intCol))
- Else
- rs(intCol) = arr(intRow, intCol)
- End If
- Next
- rs.Update
- Next
-
- '设置排序字段
- rs.Sort = rs(intSortField).Name & " DESC"
- rs.MoveFirst
- '将排好序的数据重新赋给数组
- For intRow = 0 To UBound(arr, 1)
- For intCol = 0 To UBound(arr, 2)
- arr(intRow, intCol) = rs(intCol)
- Next
- rs.MoveNext
- Next
- End Function
-
-
- ' 使用 utf-8 编码读写文本文件
- Function ReadText(FileName)
- ReadText = Pfile(FileName, "utf-8", "ForReading", "")
- End Function
- Function SaveText(FileName, TextString)
- SaveText = Pfile(FileName, "utf-8", "ForWriting", TextString)
- End Function
- Function SaveWSH(FileName, TextString)
- SaveWSH = Pfile(FileName, "Unicode", "ForWriting", TextString)
- End Function
- Function LogText(FileName, TextString)
- LogText = Pfile(FileName, "utf-8", "ForAppending", TextString)
- End Function
- Function Pfile(FileName, FileCode, strType, TextString)
- Set fso = CreateObject("Scripting.FileSystemObject")
- Set objStream = CreateObject("ADODB.Stream")
- objStream.Type = 2
- objStream.Mode = 3
- objStream.Charset = FileCode '不同编码时自己换,Chinese (Simplified) (GB2312),中文 GBK ,繁体中文 Big5 ,日文 EUC-JP ,韩文 EUC-KR,charset=UTF-8(国际化编码),ANSI,Unicode,unicode big endian
- If (fso.FileExists(FileName)) Then
- objStream.Open()
- objStream.LoadFromFile FileName
- if (strType = "ForReading") Then TextString = objStream.ReadText()
- if (strType = "ForAppending") Then TextString = TextString & objStream.ReadText()
- objStream.Close()
- End If
- If (strType = "ForWriting") Or (strType ="ForAppending") Then
- objStream.Open()
- objStream.WriteText TextString
- objStream.SaveToFile FileName, 2
- objStream.Close()
- End If
- Set objStream = Nothing
- If (strType = "ForReading") Then Pfile = TextString
- If (strType = "ForWriting") Or (strType = "ForAppending") Then Pfile = True
- End Function
复制代码
|