本帖最后由 CrLf 于 2014-4-14 10:33 编辑
再来个套用现成 vbs 的 LD 算法,准确度好像不亚于 4 楼算法:- Const input = "输入.txt"
- Const output = "输出.txt"
- Const isDebug = False
- Const length = 4
-
- t=Timer
-
- Set fso = CreateObject("Scripting.FileSystemObject")
- Set ts = fso.OpenTextFile(input,1)
-
- ar = Split(ts.ReadAll,vbCrLf)
-
- For i=UBound(ar) To 1 Step -1
- If Not test(ar,i) Then ar(i)=""
- Next
-
- Set ts = fso.CreateTextFile(output,1)
-
- For Each a In ar
- If Len(a) Then ts.WriteLine a
- Next
-
- If isDebug Then ts.WriteLine timer-t
-
- Function test(ar,i)
- Dim j,length
-
- For j=0 To i-1
- length = Len(ar(i))
- If Len(ar(j))>Len(ar(i)) Then length = Len(ar(j))
- If GetLevenshteinDistince(ar(i), ar(j)) > Sin(2/(Sqr(length)+5)) Then
- test = False
- Exit Function
- End If
- Next
- test = True
- End Function
-
- Function GetLevenshteinDistince(str1, str2) '函数引用自:http://bbs.bathome.net/thread-27991-1-1.html
- Dim x, y, A, B, C, K
- Dim l1,l2
- Dim Matrix()
- l1 = l2 = 0
- If Len(str2)>=length Then l1=Len(str2)-length+1
- If Len(str1)>=length Then l2=Len(str1)-length+1
-
- ReDim Matrix(l2, l1)
-
- '初始化第一行和第一列
- For x = 0 To UBound(Matrix, 1)
- Matrix(x, 0) = x
- Next
- For y = 0 To UBound(Matrix, 2)
- Matrix(0, y) = y
- Next
-
- '填充矩阵
- For x = 1 To UBound(Matrix, 1)
- For y = 1 To UBound(Matrix, 2)
- If (Mid(str1, Matrix(0, y), 4) = Mid(str2, Matrix(x, 0), 4)) Then
- C = Matrix(x -1 ,y - 1)
- Else
- C = Matrix(x -1 ,y - 1) + 1
- End If
-
- A = Matrix(x - 1, y) + 1
- B = Matrix(x, y - 1) + 1
-
- If (A =< B and A =< C) Then Matrix(x, y) = A
- If (B =< C and B =< A) Then Matrix(x, y) = B
- If (C =< A and C =< B) Then Matrix(x, y) = C
- Next
- Next
-
- '计算 LD 值
- If (Len(str1) > Len(str2)) Then
- K = Len(str1)-length+1
- Else
- K = Len(str2)-length+1
- End If
-
- GetLevenshteinDistince = 1 - (Matrix(l2, l1) / K)
- End Function
复制代码 函数引用自:http://bbs.bathome.net/thread-27991-1-1.html |