See also: xNM Name Matching Functions, WinBatch Extender: xnm34i.v34002.zip
udfSoundex (sString) udfSoundexEx (sString)
;------------------------------------------------------------------------------------------------------------------------------------------
If ItemLocate("udfsoundex",IntControl(77,103,0,0,0),@TAB) Then Goto skip_udfsoundex
#DefineFunction udfSoundex (sString)
If !StrLen(sString) Then Return ("")
sString = StrLower(sString)
; Retain the first letter of the name.
sCharFirst = StrSub(sString,1,1)
; special pre-processing for german language
sString = StrReplace(sString, "sch", "s") ; german special "sch"
sString = StrReplace(sString, "ß", "s") ; german special sharp-s "ß"
; Drop all occurrences of a, e, h, i, o, u, w, y.
sString = StrClean(sString,"bcdfgjklmnpqrstvxz","",@FALSE,2)
iStringLength = StrLen(sString)
If !iStringLength Then Goto label
; Shrink doubled chars to single char. See Note at end of function.
sStringTemp = ""
sCharTemp = sCharFirst
For iS=1 To iStringLength
sChar = StrSub(sString,iS,1)
If (sChar<>sCharTemp)
sStringTemp = StrCat(sStringTemp,sChar)
sCharTemp = sChar
EndIf
Next
sString = sStringTemp
; do the coding
sString = StrClean(sString, "bfpv" , "1", @TRUE, 1)
sString = StrClean(sString, "czsgjkqx", "2", @TRUE, 1)
sString = StrClean(sString, "dt" , "3", @TRUE, 1)
sString = StrClean(sString, "l" , "4", @TRUE, 1)
sString = StrClean(sString, "nm" , "5", @TRUE, 1)
sString = StrClean(sString, "r" , "6", @TRUE, 1)
:label
sSoundex = StrFix(StrCat(StrUpper(sCharFirst),sString),"0",4)
Return (sSoundex)
;..........................................................................................................................................
; Soundex manipulation based on
; U.S. Patents 1261167 (1918), 1435663 (1922)
; by Margaret K. Odell and Robert C. Russel
; as used by the National Archives and Records Administration (NARA)
; (published by Don Knuth [Knuth]).
;
; Russel's method is usable for names from england, america, western europe countries,
; but does not apply well to many Slavic and Yiddish surnames
; and is not independent of several ethnic considerations.
;
; With soundex, the "sound" of names - the phonetic sound to be exact, is coded.
; This is of great help since it avoids most problems of misspellings or alternate spellings.
; For example Scherman, Schurman, Sherman and Shireman and Shurman are indexed together as "S655".
; Surname soundex indexing is not alphabetical, but is listed by the letter-and-number code.
; If several surnames have the same code, their cards are arranged alphabetically by given name.
; Example: S655 Arthur, S655 Betsy, S655 Charles.
;
; Russell Soundex Name-Matching
; The Russell Soundex Code algorithm is designed primarily for use with English names and is a
; phonetically based name matching method. The algorithm converts each name to a four-character code,
; which can be used to identify equivalent names, and is structured as follows [Knuth]:
; 1. Retain the first letter of the name, and drop all occurrences of a, e, h, i, o, u, w, y in other positions.
; 2. Assign the following numbers to the remaining letters after the first:
; b, f, p, v == 1
; c, g, j, k, q, s, x, z == 2
; d, t == 3
; l == 4
; m, n == 5
; r == 6
; 3. If two or more letters with the same code were adjacent in the original name (before step 1) (!!!),
; omit all but the first.
; 4. Convert to the form ‘letter, digit, digit, digit’ by adding trailing zeros
; (if there are less than three digits), or by dropping rightmost digits it there are more than three.
;
; For example, the names Euler, Gauss, Hilbert, Knuth and Lloyd are given the respective codes
; E460, G200, H416, K530, L300.
; However, the algorithm also gives the same codes for
; Ellery, Ghosh, Heilbronn, Kant and Ladd [Knuth] which are not related in reality.
;
; [Knuth]: D. E. Knuth, The Art Of Computer Programming, Vol. 3, Sorting and Searching, Addison Wesley, pp391-392.
;..........................................................................................................................................
; Detlev Dalitz.20020727
;..........................................................................................................................................
#EndFunction
:skip_udfsoundex
;------------------------------------------------------------------------------------------------------------------------------------------
;------------------------------------------------------------------------------------------------------------------------------------------
If ItemLocate("udfsoundexex",IntControl(77,103,0,0,0),@TAB) Then Goto skip_udfsoundexex
#DefineFunction udfSoundexEx (sString)
If !StrLen(sString) Then Return ("")
sString = StrLower(sString)
; Retain the first letter of the name.
sCharFirst = StrSub(sString,1,1)
; special pre-processing for german language
sString = StrReplace(sString, "sch", "s") ; german special "sch"
sString = StrReplace(sString, "ß", "s") ; german special sharp-s "ß"
; Drop all occurrences of a, e, h, i, o, u, w, y.
sString = StrClean(sString,"bcdfgjklmnpqrstvxz","",@FALSE,2)
iStringLength = StrLen(sString)
If !iStringLength Then Goto label
; Shrink doubled chars to single char. See Note at end of function.
sStringTemp = ""
sCharTemp = sCharFirst
For iS=1 To iStringLength
sChar = StrSub(sString,iS,1)
If (sChar<>sCharTemp)
sStringTemp = StrCat(sStringTemp,sChar)
sCharTemp = sChar
EndIf
Next
sString = sStringTemp
; do the coding
sString = StrClean(sString, "bp" , "1", @TRUE, 1)
sString = StrClean(sString, "fv" , "2", @TRUE, 1)
sString = StrClean(sString, "cks", "3", @TRUE, 1)
sString = StrClean(sString, "gj" , "4", @TRUE, 1)
sString = StrClean(sString, "qxz", "5", @TRUE, 1)
sString = StrClean(sString, "dt" , "6", @TRUE, 1)
sString = StrClean(sString, "l" , "7", @TRUE, 1)
sString = StrClean(sString, "mn" , "8", @TRUE, 1)
sString = StrClean(sString, "r" , "9", @TRUE, 1)
:label
sSoundex = StrFix(StrCat(StrUpper(sCharFirst),sString),"0",5)
Return (sSoundex)
;..........................................................................................................................................
; The "refined soundex" or "extended soundex".
; SoundexEx manipulation based on
; U.S. Patents 1261167 (1918), 1435663 (1922)
; by Margaret K. Odell and Robert C. Russel
; (published by Don Knuth [Knuth]).
; Russel's method is usable for names from england, america, western europe countries.
; [Knuth]: D. E. Knuth, The Art Of Computer Programming, Vol. 3, Sorting and Searching, Addison Wesley, pp391-392.;
; See detailed annotations at end of User Defined Function "udfSoundEx".
;..........................................................................................................................................
; Detlev Dalitz.20020727
;..........................................................................................................................................
#EndFunction
:skip_udfsoundexex
;------------------------------------------------------------------------------------------------------------------------------------------
;--- test ---
; Including test of Name Matching Extender
AddExtender("XNM34I.DLL")
namelist = ""
namelist = StrCat(namelist,"123 test,24/7 test,ZYX test")
namelist = StrCat(namelist,",A,Lee,Kuhne,Meyer-Lansky,Oepping,Daley,Dalitz,Duhlitz,De Ledes")
namelist = StrCat(namelist,",Schüßler,Schmidt,Sandemann,Sinatra,Heinrich,Hammerschlag")
namelist = StrCat(namelist,",Williams,Wilms,Wilson,Zedlitz,Zotteldecke")
namelist = StrCat(namelist,",Scherman,Schurman,Sherman,Shireman,Shurman") ; indexed together as "S655" resp. "S9880"
namelist = StrCat(namelist,",Euler,Ellery") ; Euler, Ellery -> E460
namelist = StrCat(namelist,",Hilbert,Heilbronn") ; Hilbert, Heilbronn -> H416
namelist = StrCat(namelist,",Gauss,Ghosh") ; Gauss, Ghosh -> G200
namelist = StrCat(namelist,",Knuth,Kant") ; Knuth, Kant -> K530
namelist = StrCat(namelist,",Lloyd,Ladd") ; Lloyd, Ladd -> L300
namelist = StrCat(namelist,",Lukasiewicz,Lissajous") ; Lukasiewicz, Lissajous -> L222
namelist = StrCat(namelist,",Ashcroft") ; Ashcroft -> A226 ; The US Census uses A261 as the Soundex coding for ASHCROFT.
namelist = StrCat(namelist,",Czarkowska") ; Czarkowska -> C262
namelist = StrCat(namelist,",Hornblower") ; Hornblower -> H651
namelist = StrCat(namelist,",Looser") ; Looser -> L260
namecount = ItemCount(namelist,",")
:test1
outstr = ""
For i=1 To namecount
nameitem = ItemExtract(i,namelist,",")
outstr = StrCat(outstr,udfSoundex(nameitem)," = ",nameitem,@LF)
; The "XNM34I.DLL" extender runs faster.
outstr = StrCat(outstr,nmSoundex(nameitem,0)," = ",nameitem,@LF)
Next
IntControl(63,200,100,800,900)
IntControl(28,1,0,0,0)
AskItemlist("Demo udfSoundex (sString)",outstr,@LF,@UNSORTED,@SINGLE)
:test2
outstr = ""
For i=1 To namecount
nameitem = ItemExtract(i,namelist,",")
outstr = StrCat(outstr,udfSoundexEx(nameitem)," = ",nameitem,@LF)
; The "XNM34I.DLL" extender runs faster.
outstr = StrCat(outstr,nmSoundex(nameitem,1)," = ",nameitem,@LF)
Next
IntControl(63,200,100,800,900)
IntControl(28,1,0,0,0)
AskItemlist("Demo udfSoundexEx (sString)",outstr,@LF,@UNSORTED,@SINGLE)
:CANCEL
Exit
;------------------------------------------------------------------------------------------------------------------------------------------
;*EOF*
udfMetaphone (sString, iLength)
;------------------------------------------------------------------------------------------------------------------------------------------
If ItemLocate("udfmetaphone",IntControl(77,103,0,0,0),@TAB) Then Goto skip_udfmetaphone
#DefineFunction udfMetaphone (sString, iCodeLength)
If (sString=="") Then Return ("")
; Delete non-alpha characters from input string.
sString = StrClean(sString,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","",@FALSE,2)
If (sString=="") Then Return ("")
; Make all characters uppercase.
sString = StrUpper(sString)
; Set minimal default codelength to 4 Byte
iCodeLength = Max(4,iCodeLength)
sVowels = "AEIOU"
iStrLen = StrLen(sString)
Select 1
Case (StrSub(sString,1,2)=='KN')
sString = StrSub(sString,2,-1)
Break
Case (StrSub(sString,1,2)=='GN')
sString = StrSub(sString,2,-1)
Break
Case (StrSub(sString,1,2)=='PN')
sString = StrSub(sString,2,-1)
Break
Case (StrSub(sString,1,2)=='AE')
sString = StrSub(sString,2,-1)
Break
Case (StrSub(sString,1,2)=='WR')
sString = StrSub(sString,2,-1)
Break
Case (StrSub(sString,1,1)=='X')
sString = StrCat('S',StrSub(sString,2,-1))
Break
Case (StrSub(sString,1,2)=='WH')
sString = StrCat('W',StrSub(sString,3,-1))
Break
EndSelect
iStrLen = StrLen(sString)
sMetaphone = ''
For iS=1 To iStrLen
If (StrLen(sMetaphone)>=iCodeLength) Then Break
sChar = StrSub(sString,iS,1)
If (sChar==StrSub(sString,iS+1,1)) Then If (sChar<>'C') Then Continue
Select 1
Case (sChar=='A')
If (iS==1) Then sMetaphone = 'A'
Break
Case (sChar=='B')
If !((iS==iStrLen) && StrSub(sString,iS-1,1)=='M') Then sMetaphone = StrCat(sMetaphone,'B')
Break
Case (sChar=='C')
If (iS>1)
If (StrSub(sString,iS-1,3)=='SCI') Then Continue
If (StrSub(sString,iS-1,3)=='SCE') Then Continue
If (StrSub(sString,iS-1,3)=='SCY') Then Continue
If (StrSub(sString,iS-1,3)=='SCH')
sMetaphone = StrCat(sMetaphone,'K')
iS = iS+1
Continue
EndIf
EndIf
Select 1
Case (StrSub(sString,iS+1,2)=='IA')
sMetaphone = StrCat(sMetaphone,'X')
iS = iS+2
Break
Case (StrSub(sString,iS+1,1)=='I')
sMetaphone = StrCat(sMetaphone,'S')
iS = iS+1
Break
Case (StrSub(sString,iS+1,1)=='E')
sMetaphone = StrCat(sMetaphone,'S')
iS = iS+1
Break
Case (StrSub(sString,iS+1,1)=='Y')
sMetaphone = StrCat(sMetaphone,'S')
iS = iS+1
Break
Case (StrSub(sString,iS+1,1)=='H')
If !StrIndex(sVowels, StrSub(sString,iS+2,1),1,@FWDSCAN) ; added for 'K' in 'Ch'ristus ; DD.200200727
sMetaphone = StrCat(sMetaphone,'K') ; added for 'K' in 'Ch'ristus ; DD.200200727
Else
sMetaphone = StrCat(sMetaphone,'X')
EndIf
iS = iS+1
Break
Case 1
sMetaphone = StrCat(sMetaphone,'K')
Break
EndSelect
Break
Case (sChar=='D')
Select 1
Case (StrSub(sString,iS+1,2)=='GE')
sMetaphone = StrCat(sMetaphone,'J')
iS = iS+2
Break
Case (StrSub(sString,iS+1,2)=='GY')
sMetaphone = StrCat(sMetaphone,'J')
iS = iS+2
Break
Case (StrSub(sString,iS+1,2)=='GI')
sMetaphone = StrCat(sMetaphone,'J')
iS = iS+2
Break
Case 1
sMetaphone = StrCat(sMetaphone,'T')
Break
EndSelect
Break
Case (sChar=='E')
If (iS==1) Then sMetaphone = 'E'
Break
Case (sChar=='F')
sMetaphone = StrCat(sMetaphone,'F')
Break
Case (sChar=='G')
Select 1
Case (StrSub(sString,iS+1,1)=='H')
If ((iS+1)==iStrLen) Then sMetaphone = StrCat(sMetaphone,'K')
sCharSub = StrSub(sString,iS+2,1)
If ((sCharSub<>'') && StrIndex(sVowels,sCharSub,1,@FWDSCAN)) Then sMetaphone = StrCat(sMetaphone,'K')
iS = iS+1
Break
Case (StrSub(sString,iS+1,1)=='N')
iS = iS+1
Break
Case (StrSub(sString,iS+1,3)=='NED')
iS = iS+3
Break
Case (StrSub(sString,iS+1,1)=='I')
If ((iS>1) && StrSub(sString,iS-1,1)=='G')
sMetaphone = StrCat(sMetaphone,'K')
Else
sMetaphone = StrCat(sMetaphone,'J')
EndIf
iS = iS+1
Break
Case (StrSub(sString,iS+1,1)=='E')
If ((iS>1) && StrSub(sString,iS-1,1)=='G')
sMetaphone = StrCat(sMetaphone,'K')
Else
sMetaphone = StrCat(sMetaphone,'J')
EndIf
iS = iS+1
Break
Case (StrSub(sString,iS+1,1)=='Y')
If ((iS>1) && StrSub(sString,iS-1,1)=='G')
sMetaphone = StrCat(sMetaphone,'K')
Else
sMetaphone = StrCat(sMetaphone,'J')
EndIf
iS = iS+1
Break
Case 1
sMetaphone = StrCat(sMetaphone,'K')
Break
EndSelect
Break
Case (sChar=='H')
OK = 1
If (iS>1)
sCharSub = StrSub(sString,iS-1,1)
If ((sCharSub<>'') && StrIndex(sVowels,sCharSub,1,@FWDSCAN))
sCharSub = StrSub(sString,iS+1,1)
If ((sCharSub<>'') && !(StrIndex(sVowels,sCharSub,1,@FWDSCAN))) Then OK = 0
EndIf
Else
sCharSub = StrSub(sString,iS+1,1)
If ((sCharSub<>'') && !(StrIndex(sVowels,sCharSub,1,@FWDSCAN))) Then OK = 0
EndIf
If OK Then sMetaphone = StrCat(sMetaphone,'H')
Break
Case (sChar=='I')
If (iS==1) Then sMetaphone = 'I'
Break
Case (sChar=='J')
sMetaphone = StrCat(sMetaphone,'J')
Break
Case (sChar=='K')
OK = 1
If (iS>1) Then If (StrSub(sString,iS-1,1)=='C') Then OK = 0
If OK Then sMetaphone = StrCat(sMetaphone,'K')
Break
Case (sChar=='L')
sMetaphone = StrCat(sMetaphone,'L')
Break
Case (sChar=='M')
sMetaphone = StrCat(sMetaphone,'M')
Break
Case (sChar=='N')
sMetaphone = StrCat(sMetaphone,'N')
Break
Case (sChar=='O')
If (iS==1) Then sMetaphone = 'O'
Break
Case (sChar=='P')
If (StrSub(sString,iS+1,1)=='H')
sMetaphone = StrCat(sMetaphone,'F')
iS = iS+1
Else
sMetaphone = StrCat(sMetaphone,'P')
EndIf
Break
Case (sChar=='Q')
sMetaphone = StrCat(sMetaphone,'K')
Break
Case (sChar=='R')
sMetaphone = StrCat(sMetaphone,'R')
Break
Case (sChar=='S')
Select 1
Case (StrSub(sString,iS+1,1)=='H')
sMetaphone = StrCat(sMetaphone,'X')
iS = iS+1
Break
Case (StrSub(sString,iS+1,2)=='IO')
sMetaphone = StrCat(sMetaphone,'X')
iS = iS+2
Break
Case (StrSub(sString,iS+1,2)=='IA')
sMetaphone = StrCat(sMetaphone,'X')
iS = iS+2
Break
Case 1
sMetaphone = StrCat(sMetaphone,'S')
Break
EndSelect
Break
Case (sChar=='T')
Select 1
Case (StrSub(sString,iS+1,2)=='IA')
sMetaphone = StrCat(sMetaphone,'X')
iS = iS+2
Break
Case (StrSub(sString,iS+1,2)=='IO')
sMetaphone = StrCat(sMetaphone,'X')
iS = iS+2
Break
Case (StrSub(sString,iS+1,2)=='CH')
Break
Case (StrSub(sString,iS+1,1)=='H')
sMetaphone = StrCat(sMetaphone,'0') ; "TH" = "0" = zero.
iS = iS+1
Break
Case 1
sMetaphone = StrCat(sMetaphone,'T')
Break
EndSelect
Break
Case (sChar=='U')
If (iS==1) Then sMetaphone = 'U'
Break
Case (sChar=='V')
sMetaphone = StrCat(sMetaphone,'F')
Break
Case (sChar=='W')
sCharSub = StrSub(sString,iS+1,1)
If ((sCharSub<>'') && StrIndex(sVowels,sCharSub,1,@FWDSCAN)) Then sMetaphone = StrCat(sMetaphone,'W')
Break
Case (sChar=='X')
sMetaphone = StrCat(sMetaphone,'KS')
Break
Case (sChar=='Y')
sCharSub = StrSub(sString,iS+1,1)
If ((sCharSub<>'') && StrIndex(sVowels,sCharSub,1,@FWDSCAN)) Then sMetaphone = StrCat(sMetaphone,'Y')
Break
Case (sChar=='Z')
sMetaphone = StrCat(sMetaphone,'S')
Break
EndSelect
Next
Return (sMetaphone)
;..........................................................................................................................................
; This Function "udfMetaphone" returns a string with a typical length of 1..4 Byte,
; which represents the phonetic sound of the given input sString,
; based on simple phonetic rules for typical spoken English.
; The Metaphone algorithm does group names together that are more closely related than Soundex does.
;
; The Metaphone Algorithm was first published by Lawrence Philips in article
; "Hanging on the Metaphone", Computer Language v7 n12, December 1990, pp39-43.
; Lawrence Philips has written his Metaphone routine in Pick/BASIC.
;
; In the meantime the code has been ported to numerous other programming language platforms,
; merely bug fixed and often tweaked to fit to the needs of the world.
; A good source for further information is this group at Sourceforge: http://aspell.sourceforge.net/metaphone/
;
; This WinBatch implementation is a port of a D3BASIC script, which relies on the originally 1990 algorithm description
; but provides a more structured and better readable code design than the first published BASIC code.
; The code is provided by "bryanb@webbtide.com", referenced by http://svc423.bne034u.server-web.com/bjb/metaphone.bas.
; The Author's webpage is http://svc423.bne034u.server-web.com/bjbhtml/metaphone.html.
;
; I think that the WinBatch code does what it should do. So use it.
; Detlev Dalitz.20020727
;..........................................................................................................................................
#EndFunction
:skip_udfmetaphone
;------------------------------------------------------------------------------------------------------------------------------------------
; --- test ---
sMetaphone1 = udfMetaphone ("This is WinBatch code.",10) ; "0SSWNBKKT" ; "0" is "th"
sMetaphone2 = udfMetaphone ("Angelika",6) ; "ANJLK"
sMetaphone3 = udfMetaphone ("Christopher",6) ; "KRSTFR"
sMetaphone4 = udfMetaphone ("Detlev",4) ; "TTLF"
sMetaphone5 = udfMetaphone ("Lee",4) ; "L"
sMetaphone6 = udfMetaphone ("McComb",4) ; "MKKM"
sMetaphone7 = udfMetaphone ("Susan",4) ; "SSN"
sMetaphone8 = udfMetaphone ("Souzanna",4) ; "SSN"
sMetaphone9 = udfMetaphone ("School",4) ; "SKL"
sMetaphone10 = udfMetaphone ("Schubert",5) ; "SKBRT" ; german pronounciation fails
sMetaphone11 = udfMetaphone ("Shubert",4) ; "XBRT"
sMetaphone12 = udfMetaphone ("technical",4) ; "TKNK"
sMetaphone13 = udfMetaphone ("chemical",4) ; "XMKL"
sMetaphone14 = udfMetaphone ("Bonner",4) ; "BNR"
sMetaphone15 = udfMetaphone ("Baymore",4) ; "BMR"
sMetaphone16 = udfMetaphone ("Smith",4) ; "SM0" ; "0" is "th"
sMetaphone17 = udfMetaphone ("Saneed",4) ; "SNT"
sMetaphone18 = udfMetaphone ("Van Hoesen",5) ; "FNHSN"
sMetaphone19 = udfMetaphone ("Vincenco",5) ; "FNSNK" ; should be more like "FNSNS"
sMetaphone20 = udfMetaphone ("Christus",5) ; "KRSTS"
Exit
;------------------------------------------------------------------------------------------------------------------------------------------
;*EOF*
udfPhonex (sString)
;------------------------------------------------------------------------------------------------------------------------------------------
If ItemLocate("udfphonex",IntControl(77,103,0,0,0),@TAB) Then Goto skip_udfphonex
#DefineFunction udfPhonex (sString)
If (sString=="") Then Return ("")
sString = StrTrim(sString)
sString = StrUpper(sString)
; Additional pre-processing for german language
sString = StrReplace(sString, "SCH", "S") ; German special "sch".
sString = StrReplace(sString, "ß", "S") ; German special sharp-s "ß"
; Load a WinBatch Binary Buffer
iStringLength = StrLen(sString)
hBB = BinaryAlloc(iStringLength+1) ; Plus one byte for EndOfString zero byte.
BinaryPokeStr(hBB,0,sString)
; Remove all trailing 'S' characters at the end of the name.
While (83==BinaryPeek(hBB,iStringLength-1)) ; S
iStringLength = iStringLength-1
EndWhile
BinaryPoke(hBB,iStringLength,0) ; set EndOfString using zero byte
; Convert leading letter-pairs as follows: KN to N ; WR to R ; PH to F
iStringPos = 0
Select BinaryPeek2(hBB,iStringPos)
Case 20043 ; KN ; 20043 = Char2Num("K") + 256*Char2Num("N") = 75 + 256*78
BinaryPoke(hBB,0,78) ; N
iStringPos = 1
Break
Case 21079 ; WR
BinaryPoke(hBB,0,82) ; R
iStringPos = 1
Break
Case 18512 ; PH
BinaryPoke(hBB,0,70) ; F
iStringPos = 1
Break
EndSelect
If (iStringPos==0)
; Ignore H first letter
If (72==BinaryPeek(hBB,iStringPos)) Then iStringPos = 1 ; H
; Convert leading single letters: (A), E, I, O, U, Y to A ; K, Q to C ; P to B ; J to G ; V to F ; Z to S.
iAscii = BinaryPeek(hBB,iStringPos)
Select iAscii
Case 65 ; A
Case 69 ; E
Case 73 ; I
Case 79 ; O
Case 86 ; U
Case 89 ; Y
BinaryPoke(hBB,0,65) ; A
Break
Case 75 ; K
Case 81 ; Q
BinaryPoke(hBB,0,67) ; C
Break
Case 80 ; P
BinaryPoke(hBB,0,66) ; B
Break
Case 74 ; J
BinaryPoke(hBB,0,71) ; G
Break
Case 86 ; V
BinaryPoke(hBB,0,70) ; F
Break
Case 90 ; Z
BinaryPoke(hBB,0,83) ; S
Break
EndSelect
EndIf
; Do the coding
iCodePos = 1
While 1
iStringPos = iStringPos+1
If (iStringPos>=iStringLength) Then Break
If (iCodePos>3) Then Break
Select BinaryPeek(hBB,iStringPos)
Case 66 ; B
Case 70 ; F
Case 80 ; P
Case 86 ; V
If (49<>BinaryPeek(hBB,iCodePos-1))
BinaryPoke(hBB,iCodePos,49) ; code 1
iCodePos = iCodePos+1
EndIf
Break
Case 67 ; C
Case 71 ; G
Case 74 ; J
Case 75 ; K
Case 81 ; Q
Case 83 ; S
Case 88 ; X
Case 90 ; Z
If (50<>BinaryPeek(hBB,iCodePos-1))
BinaryPoke(hBB,iCodePos,50) ; code 2
iCodePos = iCodePos+1
EndIf
Break
Case 68 ; D
Case 84 ; T
If (67<>BinaryPeek(hBB,iStringPos+1)) ; C
If (51<>BinaryPeek(hBB,iCodePos-1))
BinaryPoke(hBB,iCodePos,51) ; code 3
iCodePos = iCodePos+1
EndIf
EndIf
Break
Case 76 ; L
Select BinaryPeek(hBB,iStringPos+1)
Case 0 ; End of String
Case 65 ; A
Case 69 ; E
Case 73 ; I
Case 79 ; O
Case 85 ; U
Case 89 ; Y
If (52<>BinaryPeek(hBB,iCodePos-1))
BinaryPoke(hBB,iCodePos,52) ; code 4
iCodePos = iCodePos+1
EndIf
iStringPos = iStringPos+1
Break
EndSelect
Break
Case 77 ; M
Case 78 ; N
Select BinaryPeek(hBB,iStringPos+1)
Case 68 ; D
Case 71 ; G
iStringPos = iStringPos+1
Break
EndSelect
If (53<>BinaryPeek(hBB,iCodePos-1))
BinaryPoke(hBB,iCodePos,53) ; code 5
iCodePos = iCodePos+1
EndIf
Break
Case 82 ; R
Select BinaryPeek(hBB,iStringPos+1)
Case 0 ; End of String
Case 65 ; A
Case 69 ; E
Case 73 ; I
Case 79 ; O
Case 85 ; U
Case 89 ; Y
If (54<>BinaryPeek(hBB,iCodePos-1))
BinaryPoke(hBB,iCodePos,54) ; code 6
iCodePos = iCodePos+1
EndIf
iStringPos = iStringPos+1
Break
EndSelect
Break
EndSelect
EndWhile
sPhonex = StrFix(BinaryPeekStr(hBB,0,iCodePos),"0",4)
BinaryFree(hBB)
Return (sPhonex)
;..........................................................................................................................................
; Abstract
; The Phonex name-matching algorithm is a compromise between generality and specificity,
; and achieves a comparatively good overall performance
; when applied to names in the English language.
; Current namematching methods, including the Phonex algorithm,
; fall into one of two categories,
; those which consider the phonetic structure of names,
; and those which consider the names on a character-by-character basis.
; Both of these approaches have advantages and disadvantages that make
; them better or worse when applied to a specific task.
; The phonetic-based approaches (such as Phonex) are somewhat more
; specific to a particular language but implement a better appreciation
; of names that sound similar.
;..........................................................................................................................................
; The overall accuracy of the Phonex method is marginally lower
; (approximately 0.2%) than that of the Soundex method.
; The percentage of true matches determined by the Phonex method is
; approximately 44% higher than that of the Soundex method.
;..........................................................................................................................................
; The Phonex Algorithm
; (Note: See page p21 in "Phonex" PDF documentation.)
;
; The algorithm converts each name to a four-character code,
; which can be used to identify equivalent names,
; and is structured as follows:
; Pre-process the name according to the following rules:
; 1. Remove all trailing 'S' characters at the end of the name.
; 2. Convert leading letter-pairs as follows:
; KN -> N
; WR -> R
; PH -> F
; 3. Convert leading single letters as follows:
; H -> Remove
; E, I, O, U, Y -> A
; K, Q -> C
; P -> B
; J -> G
; V -> F
; Z -> S
; Code the pre-processed name according to the following rules:
; 1. Retain the first letter of the name, and drop all occurrences
; of A, E, H, I, O, U, W, Y in other positions.
; 2. Assign the following numbers to the remaining letters after the first:
; B, F, P, V -> 1
; C, G, J, K, Q, S, X, Z -> 2
; D, T -> 3 If not followed by C.
; L -> 4 If not followed by vowel or end of name.
; M, N -> 5 Ignore next letter if either D or G.
; R -> 6 If not followed by vowel or end of name.
; 3. Ignore the current letter if it has the same code digit as the last character of the code.
; 4. Convert to the form ‘letter, digit, digit, digit’ by adding trailing zeros
; (if there are less than three digits), or by dropping rightmost digits it there are more than three.
; Although the resulting four-character code is identical in format to that produced by the Soundex coding
; algorithm, these two forms are not compatible.
;..........................................................................................................................................
; Note:
; There is a heavy, almost deadly, difference between the descriptions on page p21 and page p27!
; See yourself: Compare the descriptions of cases 'L' and 'R'.
;
; My implementation of the "Phonex" algorithm in WinBatch code relies on the following formal description.
;
; On page p27 in the "Phonex" PDF document a part of the algorithm is described as follows:
;
; The format of the code is the same as for the /soundex/ method (i.e. letter, digit, digit, digit)
; but the manner in which these code characters are determined is slightly different.
; The majority of the original character equivalents are retained,
; using the following translations of letters in to code digits
; (the first letter of the name is treated separately and is added directly to the code as a letter not a digit):
; b, f, p, v -> 1
; c, g, j, k, q, s, x, z -> 2
; d, t -> 3
; l -> 4
; m, n -> 5
; r -> 6
; The Phonex method does, however, slightly modify this coding technique in that the letters
; D’, ‘T’, ‘L’, ‘M’, ‘N’, and ‘R’ are subject to further processing before determining what code digit to add:
; - If a ‘D’ or ‘T’ is followed by a ‘C’, the ‘D’ or ‘T’ is not coded since it is considered that,
; as in ‘TCH’ and ‘CH’, the omission of the ‘D’ or ‘T’ will enable more true matches to be identified.
; - An ‘L’ is only coded if it is followed by a vowel, or it is the last character of a name.
; - If an ‘M’ or ‘N’ is followed by a ‘D’ or ‘G’ the following letter is overwritten with a duplicate of the
; current (which will then be ignored), since these letter combinations are considered to phonetically equivalent.
; - An ‘R’ is only coded if it is followed by a vowel, or it is the last character of a name.
;..........................................................................................................................................
; Reference:
; Phonex documentation in "NameMatching.pdf", Created: 16.04.2001 15:26:54.
; "An Assessment of Name Matching Algorithms"
; A. J. Lait and B. Randell
; Department of Computing Science
; University of Newcastle upon Tyne
; Contact author: Brian.Randell@newcastle.ac.uk
; http://www.cs.ncl.ac.uk/people/brian.randell/
; http://www.cs.ncl.ac.uk/publications/trs/papers/550.pdf
;..........................................................................................................................................
; Detlev Dalitz.20020729
;..........................................................................................................................................
#EndFunction
:skip_udfphonex
;------------------------------------------------------------------------------------------------------------------------------------------
;--- test ---
; Including test of Name Matching Extender.
AddExtender("XNM34I.DLL")
namelist = ""
namelist = StrCat(namelist, "123 test ") ; 1323 = 123 test
namelist = StrCat(namelist,",24/7 test ") ; 2323 = 24/7 test
namelist = StrCat(namelist,",ZYX test ") ; S232 = ZYX test
namelist = StrCat(namelist,",A ") ; A000 = A
namelist = StrCat(namelist,",Lee ") ; L000 = Lee
namelist = StrCat(namelist,",Kuhne ") ; C500 = Kuhne
namelist = StrCat(namelist,",Meyer-Lansky") ; M452 = Meyer-Lansky
namelist = StrCat(namelist,",Oepping ") ; A150 = Oepping
namelist = StrCat(namelist,",Daley ") ; D400 = Daley
namelist = StrCat(namelist,",Dalitz ") ; D432 = Dalitz
namelist = StrCat(namelist,",Duhlitz ") ; D432 = Duhlitz
namelist = StrCat(namelist,",De Ledes ") ; D430 = De Ledes
namelist = StrCat(namelist,",Schüßler ") ; S246 = Schüßler
namelist = StrCat(namelist,",Schmidt ") ; S530 = Schmidt
namelist = StrCat(namelist,",Sandemann ") ; S500 = Sandemann
namelist = StrCat(namelist,",Sinatra ") ; S536 = Sinatra
namelist = StrCat(namelist,",Heinrich ") ; A562 = Heinrich
namelist = StrCat(namelist,",Hammerschlag") ; A524 = Hammerschlag
namelist = StrCat(namelist,",Williams ") ; W450 = Williams
namelist = StrCat(namelist,",Wilms ") ; W500 = Wilms
namelist = StrCat(namelist,",Wilson ") ; W250 = Wilson
namelist = StrCat(namelist,",Worms ") ; W500 = Worms
namelist = StrCat(namelist,",Zedlitz ") ; S343 = Zedlitz
namelist = StrCat(namelist,",Zotteldecke ") ; S320 = Zotteldecke
namelist = StrCat(namelist,",Scherman ") ; S500 = Scherman
namelist = StrCat(namelist,",Schurman ") ; S500 = Schurman
namelist = StrCat(namelist,",Sherman ") ; S500 = Sherman
namelist = StrCat(namelist,",Shireman ") ; S650 = Shireman
namelist = StrCat(namelist,",Shurman ") ; S500 = Shurman
namelist = StrCat(namelist,",Euler ") ; A460 = Euler
namelist = StrCat(namelist,",Ellery ") ; A460 = Ellery
namelist = StrCat(namelist,",Hilbert ") ; A130 = Hilbert
namelist = StrCat(namelist,",Heilbronn ") ; A165 = Heilbronn
namelist = StrCat(namelist,",Gauss ") ; G000 = Gauss
namelist = StrCat(namelist,",Ghosh ") ; G200 = Ghosh
namelist = StrCat(namelist,",Knuth ") ; N300 = Knuth
namelist = StrCat(namelist,",Kant ") ; C530 = Kant
namelist = StrCat(namelist,",Lloyd ") ; L430 = Lloyd
namelist = StrCat(namelist,",Ladd ") ; L300 = Ladd
namelist = StrCat(namelist,",Lukasiewicz ") ; L200 = Lukasiewicz
namelist = StrCat(namelist,",Lissajous ") ; L200 = Lissajous
namelist = StrCat(namelist,",Ashcraft ") ; A261 = Ashcraft
namelist = StrCat(namelist,",Philip ") ; F410 = Philip
namelist = StrCat(namelist,",Fripp ") ; F610 = Fripp
namelist = StrCat(namelist,",Czarkowska ") ; C262 = Czarkowska
namelist = StrCat(namelist,",Hornblower ") ; A514 = Hornblower
namelist = StrCat(namelist,",Looser ") ; L260 = Looser
namecount = ItemCount(namelist,",")
outstr = ""
For i=1 To namecount
nameitem = ItemExtract(i,namelist,",")
outstr = StrCat(outstr,udfPhonex(nameitem)," = ",nameitem,@CR)
; The "XNM34I.DLL" extender runs about 40 times faster.
outstr = StrCat(outstr,nmPhonex(nameitem)," = ",nameitem,@CR)
Next
IntControl(63,200,100,800,900)
IntControl(28,1,0,0,0)
AskItemlist("Demo udfPhonex (sString)",outstr,@CR,@UNSORTED,@SINGLE)
:CANCEL
Exit
;------------------------------------------------------------------------------------------------------------------------------------------
;-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
If ItemLocate("udfdoublemetaphone",IntControl(77,103,0,0,0),@TAB) Then Goto skip_udfdoublemetaphone
#DefineFunction udfDoubleMetaphone (sString, iCodeLength)
If (sString=="") Then Return ("")
sPrimary = ""
sSecondary = ""
; Set the default minimal length to 4 code characters.
iCodeLength = Max(4,iCodeLength)
iLength = StrLen(sString)
iLast = iLength
sString = StrUpper(sString)
iCurrent = 1
; Check for SlavoGermanic fragments
For i=1 To 4
bIsSlavoGermanic = !!StrIndex(sString,ItemExtract(i,"W,K,CZ,WITZ",","),0,@BACKSCAN)
If bIsSlavoGermanic Then Break
Next
; Skip this at beginning of word
If (ItemLocate(StrSub(sString,1,2),"GN,KN,PN,WR,PS",",")) Then iCurrent = iCurrent+1
; Initial "X" is pronounced "Z" e.g. "Xavier"
If (StrSub(sString,1,1)=="X")
sPrimary = StrCat(sPrimary,"S")
sSecondary = StrCat(sSecondary,"S")
iCurrent = iCurrent+1
EndIf
; Pad the original string so that we can index beyond the edge of the world
; Five spaces added should be enough.
sString = StrCat(sString," ")
; Main loop
While ((StrLen(sPrimary)<iCodeLength)||(StrLen(sSecondary)<iCodeLength))
If (iCurrent>iLength) Then Break
iChar = Char2Num(StrSub(sString,iCurrent,1))
Switch iChar
Case 65 ; Char2Num("A")
Case 69 ; Char2Num("E")
Case 73 ; Char2Num("I")
Case 79 ; Char2Num("O")
Case 85 ; Char2Num("U")
Case 89 ; Char2Num("Y")
If (iCurrent==1)
; All init vowels now map to "A"
sPrimary = StrCat(sPrimary,"A")
sSecondary = StrCat(sSecondary,"A")
EndIf
iCurrent = iCurrent+1
Break
Case 66 ; Char2Num("B")
; "-mb", e.g. "dumb", already skipped over ...
sPrimary = StrCat(sPrimary,"P")
sSecondary = StrCat(sSecondary,"P")
If (StrSub(sString,iCurrent+1,1)=="B") Then iCurrent = iCurrent+2
Else iCurrent = iCurrent+1
Break
Case 199 ; Char2Num("Ç") ; Ansi=0199 ; Ascii=128
sPrimary = StrCat(sPrimary,"S")
sSecondary = StrCat(sSecondary,"S")
iCurrent = iCurrent+1
Break
Case 67 ; Char2Num("C")
; various germanic
If (iCurrent>2)
If !StrIndex("AEIOUY",StrSub(sString,iCurrent-2,1),1,@FWDSCAN) ; if not a vowel
If (StrSub(sString,iCurrent-1,3)=="ACH")
If ( (StrSub(sString,iCurrent+2,1)<>"I") && ( (StrSub(sString,iCurrent+2,1)<>"E") || ItemLocate(StrSub(sString,iCurrent-2,6),"BACHER,MACHER",",") ) )
sPrimary = StrCat(sPrimary,"X") ; primary changed from "K" to "X" ; Detlev Dalitz.20020801
sSecondary = StrCat(sSecondary,"K")
iCurrent = iCurrent+2
Break
EndIf
EndIf
EndIf
EndIf
; special case "caesar"
If (iCurrent==1)
If (StrSub(sString,iCurrent,6)=="CAESAR")
sPrimary = StrCat(sPrimary,"S")
sSecondary = StrCat(sSecondary,"S")
iCurrent = iCurrent+2
Break
EndIf
EndIf
; italian "chianti"
If (StrSub(sString,iCurrent,4)=="CHIA")
sPrimary = StrCat(sPrimary,"K")
sSecondary = StrCat(sSecondary,"K")
iCurrent = iCurrent+2
Break
EndIf
If (StrSub(sString,iCurrent,2)=="CH")
; find "michael"
If (iCurrent>1)
If (StrSub(sString,iCurrent,4)=="CHAE")
sPrimary = StrCat(sPrimary,"K")
sSecondary = StrCat(sSecondary,"X")
iCurrent = iCurrent+2
Break
EndIf
EndIf
; greek roots e.g. "chemistry","chorus"
If (iCurrent==1)
If ( ItemLocate(StrSub(sString,iCurrent+1,5),"HARAC,HARIS",",") || ItemLocate(StrSub(sString,iCurrent+1,3),"HOR,HYM,HIA,HEM",",") )
If (StrSub(sString,1,5)<>"CHORE")
sPrimary = StrCat(sPrimary,"K")
sSecondary = StrCat(sSecondary,"K")
iCurrent = iCurrent+2
Break
EndIf
EndIf
EndIf
; germanic, greek, or otherwise "ch" for "kh" sound
bValue1 = ItemLocate(StrSub(sString,1,4),"VAN ,VON ",",")
bValue2 = (StrSub(sString,1,3)=="SCH")
bValue3 = ItemLocate(StrSub(sString,iCurrent-2,6),"ORCHES,ARCHIT,ORCHID",",") ; 'architect but not 'arch', 'orchestra', 'orchid'
bValue4 = ItemLocate(StrSub(sString,iCurrent+2,1),"T,S",",")
bValue5 = ItemLocate(StrSub(sString,iCurrent-1,1),"A,O,U,E",",")
bValue6 = (iCurrent==1)
bValue7 = ItemLocate(StrSub(sString,iCurrent+2,1),"L,R,N,M,B,H,F,V,W, ",",") ; e.g., 'wachtler', 'wechsler', but not 'tichner'
If ( (bValue1 || bValue2) || bValue3 || bValue4 || ( ( bValue5 || bValue6 ) && bValue7) )
sPrimary = StrCat(sPrimary,"K")
sSecondary = StrCat(sSecondary,"K")
Else
If (iCurrent>1)
If (StrSub(sString,1,2)=="MC")
; e.g., "McHugh"
sPrimary = StrCat(sPrimary,"K")
sSecondary = StrCat(sSecondary,"K")
Else
sPrimary = StrCat(sPrimary,"X")
sSecondary = StrCat(sSecondary,"K")
EndIf
Else
sPrimary = StrCat(sPrimary,"X")
sSecondary = StrCat(sSecondary,"X")
EndIf
EndIf
iCurrent = iCurrent+2
Break
EndIf
; e.g. "czerny"
If (StrSub(sString,iCurrent,2)=="CZ")
If (StrSub(sString,iCurrent-2,4)<>"WICZ")
sPrimary = StrCat(sPrimary,"S")
sSecondary = StrCat(sSecondary,"X")
iCurrent = iCurrent+2
Break
EndIf
EndIf
; e.g. "focaccia"
If (StrSub(sString,iCurrent+1,3)=="CIA")
sPrimary = StrCat(sPrimary,"X")
sSecondary = StrCat(sSecondary,"X")
iCurrent = iCurrent+3
Break
EndIf
; double "C", but not McClellan"
If ( (StrSub(sString,iCurrent,2)=="CC") && !( (iCurrent==2) && (StrSub(sString,1,1)=="M") ) )
; 'bellocchio' but not 'bacchus'
If ( ItemLocate(StrSub(sString,iCurrent+2,1),"I,E,H",",") && !(StrSub(sString,iCurrent+2,2)=="HU") )
; 'accident', 'accede' 'succeed'
If ( ( (iCurrent==2) && (StrSub(sString,iCurrent-1,1)=="A") ) || ItemLocate(StrSub(sString,iCurrent-1,5),"UCCEE,UCCES",",") )
sPrimary = StrCat(sPrimary,"KS")
sSecondary = StrCat(sSecondary,"KS")
Else
; "bacci", "bertucci", other italian
sPrimary = StrCat(sPrimary,"X")
sSecondary = StrCat(sSecondary,"X")
EndIf
iCurrent = iCurrent+3
Break
Else
; Pierce's rule
sPrimary = StrCat(sPrimary,"K")
sSecondary = StrCat(sSecondary,"K")
iCurrent = iCurrent+2
Break
EndIf
EndIf
If ItemLocate(StrSub(sString,iCurrent,2),"CK,CG,CQ",",")
sPrimary = StrCat(sPrimary,"K")
sSecondary = StrCat(sSecondary,"K")
iCurrent = iCurrent+2
Break
EndIf
If ItemLocate(StrSub(sString,iCurrent,2),"CI,CE,CY",",")
; italian vs. english
If ItemLocate(StrSub(sString,iCurrent,3),"CIO,CIE,CIA",",")
sPrimary = StrCat(sPrimary,"S")
sSecondary = StrCat(sSecondary,"X")
Else
sPrimary = StrCat(sPrimary,"S")
sSecondary = StrCat(sSecondary,"S")
EndIf
iCurrent = iCurrent+2
Break
EndIf
; else
sPrimary = StrCat(sPrimary,"K")
sSecondary = StrCat(sSecondary,"K")
; name sent in "mac caffrey", "mac gregor"
If ItemLocate(StrSub(sString,iCurrent+1,2)," C, Q, G",",")
iCurrent = iCurrent+3
Else
If ( ItemLocate(StrSub(sString,iCurrent+1,1),"C,K,Q",",") && !ItemLocate(StrSub(sString,iCurrent+1,2),"CE,CI",",") )
iCurrent = iCurrent+2
Else
iCurrent = iCurrent+1
EndIf
EndIf
Break
Case 68 ; Char2Num("D")
If (StrSub(sString,iCurrent,2)=="DG")
If (ItemLocate(StrSub(sString,iCurrent+2,1),"I,E,Y",","))
; e.g. "edge"
sPrimary = StrCat(sPrimary,"J")
sSecondary = StrCat(sSecondary,"J")
iCurrent = iCurrent+3
Break
Else
; e.g. "edgar"
sPrimary = StrCat(sPrimary,"TK")
sSecondary = StrCat(sSecondary,"TK")
iCurrent = iCurrent+2
Break
EndIf
EndIf
If (ItemLocate(StrSub(sString,iCurrent,2),"DT,DD",","))
sPrimary = StrCat(sPrimary,"T")
sSecondary = StrCat(sSecondary,"T")
iCurrent = iCurrent+2
Break
EndIf
; else
sPrimary = StrCat(sPrimary,"T")
sSecondary = StrCat(sSecondary,"T")
iCurrent = iCurrent+1
Break
Case 70 ; Char2Num("F")
sPrimary = StrCat(sPrimary,"F")
sSecondary = StrCat(sSecondary,"F")
If (StrSub(sString,iCurrent+1,1)=="F") Then iCurrent = iCurrent+2
Else iCurrent = iCurrent+1
Break
Case 71 ; Char2Num("G")
If (StrSub(sString,iCurrent+1,1)=="H")
If (iCurrent>1)
If !StrIndex("AEIOUY",StrSub(sString,iCurrent-1,1),1,@FWDSCAN)
sPrimary = StrCat(sPrimary,"K")
sSecondary = StrCat(sSecondary,"K")
iCurrent = iCurrent+2
Break
EndIf
EndIf
If (iCurrent<4)
; "ghislane","ghiradelli"
If (iCurrent==1)
If (StrSub(sString,iCurrent+2,1)=="I")
sPrimary = StrCat(sPrimary,"J")
sSecondary = StrCat(sSecondary,"J")
Else
sPrimary = StrCat(sPrimary,"K")
sSecondary = StrCat(sSecondary,"K")
EndIf
iCurrent = iCurrent+2
Break
EndIf
EndIf
; Parker's rule (with some further refinements)
bValue1 = (iCurrent>2)
bValue2 = ItemLocate(StrSub(sString,iCurrent-2,1),"B,H,D",",") ; e.g., 'hugh'
bValue3 = (iCurrent>3)
bValue4 = ItemLocate(StrSub(sString,iCurrent-3,1),"B,H,D",",") ; e.g., 'bough'
bValue5 = (iCurrent>4)
bValue6 = ItemLocate(StrSub(sString,iCurrent-4,1),"B,H",",") ; e.g., 'broughton'
If ( (bValue1 && bValue2) || (bvalue3 && bValue4) || (bValue5 && bValue6))
iCurrent = iCurrent+2
Break
Else
; e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
If ( (iCurrent>3) && (StrSub(sString,iCurrent-1,1)=="U") && ItemLocate(StrSub(sString,iCurrent-3,1),"C,G,L,R,T",",") )
sPrimary = StrCat(sPrimary,"F")
sSecondary = StrCat(sSecondary,"F")
Else
If ( (iCurrent>1) && (StrSub(sString,iCurrent-1,1)<>"I"))
sPrimary = StrCat(sPrimary,"K")
sSecondary = StrCat(sSecondary,"K")
EndIf
EndIf
iCurrent = iCurrent+2
Break
EndIf
EndIf
If (StrSub(sString,iCurrent+1,1)=="N")
If ( (iCurrent==2) && StrIndex("AEIOUY",StrSub(sString,1,1),1,@FWDSCAN) && !bIsSlavoGermanic )
sPrimary = StrCat(sPrimary,"KN")
sSecondary = StrCat(sSecondary,"N")
Else
; not e.g. 'cagney'
If ( (StrSub(sString,iCurrent+2,2)<>"EY") && (StrSub(sString,iCurrent+1,1)<>"Y") && !bIsSlavoGermanic )
sPrimary = StrCat(sPrimary,"N")
sSecondary = StrCat(sSecondary,"KN")
Else
sPrimary = StrCat(sPrimary,"KN")
sSecondary = StrCat(sSecondary,"KN")
EndIf
EndIf
iCurrent = iCurrent+2
Break
EndIf
; "tagliaro"
If !bIsSlavoGermanic
If (StrSub(sString,iCurrent+1,2)=="LI")
sPrimary = StrCat(sPrimary,"KL")
sSecondary = StrCat(sSecondary,"L")
iCurrent = iCurrent+2
Break
EndIf
EndIf
; -ges-,-gep-,-gel- at beginning
If (iCurrent==1)
If (StrSub(sString,iCurrent+1,1)=="Y")
sPrimary = StrCat(sPrimary,"K")
sSecondary = StrCat(sSecondary,"J")
iCurrent = iCurrent+2
Break
EndIf
If ItemLocate(StrSub(sString,iCurrent+1,2),"ES,EP,EB,EL,EY,IB,IL,IN,IE,EI,ER",",")
sPrimary = StrCat(sPrimary,"K")
sSecondary = StrCat(sSecondary,"J")
iCurrent = iCurrent+2
Break
EndIf
EndIf
; -ger-,-gy-
If !ItemLocate(StrSub(sString,iCurrent-1,3),"RGY,OGY",",")
If !ItemLocate(StrSub(sString,iCurrent-1,1),"E,I",",")
If !ItemLocate(StrSub(sString,1,6),"DANGER,RANGER,MANGER",",")
If ((StrSub(sString,iCurrent+1,1)=="Y") || ItemLocate(StrSub(sString,iCurrent+1,2),"ER",","))
sPrimary = StrCat(sPrimary,"K")
sSecondary = StrCat(sSecondary,"J")
iCurrent = iCurrent+2
Break
EndIf
EndIf
EndIf
EndIf
; italian e.g. "biaggi"
If (ItemLocate(StrSub(sString,iCurrent+1,1),"E,I,Y",",") || ItemLocate(StrSub(sString,iCurrent-1,4),"AGGI,OGGI",","))
; obvious germanic
If ( ItemLocate(StrSub(sString,1,4),"VAN ,VON ",",") || (StrSub(sString,1,3)=="SCH") || (StrSub(sString,iCurrent+1,2)=="ET") )
sPrimary = StrCat(sPrimary,"K")
sSecondary = StrCat(sSecondary,"K")
Else
; always soft if french ending
If (StrSub(sString,iCurrent+1,4)=="IER ")
sPrimary = StrCat(sPrimary,"J")
sSecondary = StrCat(sSecondary,"J")
Else
sPrimary = StrCat(sPrimary,"J")
sSecondary = StrCat(sSecondary,"K")
EndIf
EndIf
iCurrent = iCurrent+2
Break
EndIf
sPrimary = StrCat(sPrimary,"K")
sSecondary = StrCat(sSecondary,"K")
If (StrSub(sString,iCurrent+1,1)=="G") Then iCurrent = iCurrent+2
Else iCurrent = iCurrent+1
Break
Case 72 ; Char2Num("H")
; only keep if first & before vowel or btw. 2 vowels
If StrIndex("AEIOUY",StrSub(sString,iCurrent+1,1),1,@FWDSCAN)
If (iCurrent==1)
sPrimary = StrCat(sPrimary,"H")
sSecondary = StrCat(sSecondary,"H")
iCurrent = iCurrent+2
Break
EndIf
If StrIndex("AEIOUY",StrSub(sString,iCurrent-1,1),1,@FWDSCAN)
sPrimary = StrCat(sPrimary,"H")
sSecondary = StrCat(sSecondary,"H")
iCurrent = iCurrent+2
Break
EndIf
EndIf
; also takes care of 'HH'
iCurrent = iCurrent+1
Break
Case 74 ; Char2Num("J")
; obvious spanish, "jose","san jacinto"
If ( (StrSub(sString,iCurrent,4)=="JOSE") || (StrSub(sString,1,4)=="SAN ") )
If ( ( (iCurrent==1) && (StrSub(sString,iCurrent+4,1)==" ") ) || (StrSub(sString,1,4)=="SAN ") )
sPrimary = StrCat(sPrimary,"H")
sSecondary = StrCat(sSecondary,"H")
Else
sPrimary = StrCat(sPrimary,"J")
sSecondary = StrCat(sSecondary,"H")
EndIf
iCurrent = iCurrent+1
Break
EndIf
If ( (iCurrent==1) && (StrSub(sString,iCurrent,4)<>"JOSE" ) )
sPrimary = StrCat(sPrimary,"J") ; Yankelovich/Jankelowicz
sSecondary = StrCat(sSecondary,"A")
Else
; spanish pron. of e.g. 'bajador'
If ( StrIndex("AEIOUY",StrSub(sString,iCurrent-1,1),1,@FWDSCAN) && !bIsSlavoGermanic && ItemLocate(StrSub(sString,iCurrent+1,1),"A,O",",") )
sPrimary = StrCat(sPrimary,"J")
sSecondary = StrCat(sSecondary,"H")
Else
If (iCurrent==iLast)
sPrimary = StrCat(sPrimary,"J")
sSecondary = StrCat(sSecondary,"")
Else
If ( !ItemLocate(StrSub(sString,iCurrent+1,1),"L,T,K,S,N,M,B,Z",",") && !ItemLocate(StrSub(sString,iCurrent-1,1),"S,K,L",",") )
sPrimary = StrCat(sPrimary,"J")
sSecondary = StrCat(sSecondary,"J")
EndIf
EndIf
EndIf
EndIf
If (StrSub(sString,iCurrent+1,1)=="J") Then iCurrent = iCurrent+2 ; it could happen
Else iCurrent = iCurrent+1
Break
Case 75 ; Char2Num("K")
sPrimary = StrCat(sPrimary,"K")
sSecondary = StrCat(sSecondary,"K")
If (StrSub(sString,iCurrent+1,1)=="K") Then iCurrent = iCurrent+2
Else iCurrent = iCurrent+1
Break
Case 76 ; Char2Num("L")
If (StrSub(sString,iCurrent+1,1)=="L")
; spanish e.g. "cabrillo","gallegos"
bValue1 = (iCurrent==(iLength-3))
bValue2 = ItemLocate(StrSub(sString,iCurrent-1,4),"ILLO,ILLA,ALLE",",")
bValue3 = ItemLocate(StrSub(sString,iLast-1,2),"AS,OS",",")
bValue4 = ItemLocate(StrSub(sString,iLast,1),"A,O",",")
bValue5 = (StrSub(sString,iCurrent-1,4)=="ALLE")
If ( (bValue1 && bValue2) || ( (bValue3 || bValue4) && bValue5 ) )
sPrimary = StrCat(sPrimary,"L")
sSecondary = StrCat(sSecondary,"")
iCurrent = iCurrent+2
Break
EndIf
iCurrent = iCurrent+2
Else
iCurrent = iCurrent+1
EndIf
sPrimary = StrCat(sPrimary,"L")
sSecondary = StrCat(sSecondary,"L")
Break
Case 77 ; Char2Num("M")
; "dumb","thumb"
bValue1 = (StrSub(sString,iCurrent-1,3)=="UMB")
bValue2 = ((iCurrent+1)==iLast)
bValue3 = (StrSub(sString,iCurrent+2,2)=="ER")
bValue4 = (StrSub(sString,iCurrent+1,1)=="M")
If ( ( bValue1 && (bValue2 || bValue3) ) || bValue4 ) Then iCurrent = iCurrent+2
Else iCurrent = iCurrent+1
sPrimary = StrCat(sPrimary,"M")
sSecondary = StrCat(sSecondary,"M")
Break
Case 78 ; Char2Num("N")
sPrimary = StrCat(sPrimary,"N")
sSecondary = StrCat(sSecondary,"N")
If (StrSub(sString,iCurrent+1,1)=="N") Then iCurrent = iCurrent+2
Else iCurrent = iCurrent+1
Break
Case 209 ; Char2Num("Ñ") ; Ansi=0209 ; Ascii=165
sPrimary = StrCat(sPrimary,"N")
sSecondary = StrCat(sSecondary,"N")
iCurrent = iCurrent+1
Break
Case 80 ; Char2Num("P")
If (StrSub(sString,iCurrent+1,1)=="H")
sPrimary = StrCat(sPrimary,"F")
sSecondary = StrCat(sSecondary,"F")
iCurrent = iCurrent+2
Break
EndIf
; also account for "campbell" and "raspberry"
sPrimary = StrCat(sPrimary,"P")
sSecondary = StrCat(sSecondary,"P")
If (ItemLocate(StrSub(sString,iCurrent+1,1),"P,B",",")) Then iCurrent = iCurrent+2
Else iCurrent = iCurrent+1
Break
Case 81 ; Char2Num("Q")
sPrimary = StrCat(sPrimary,"K")
sSecondary = StrCat(sSecondary,"K")
If (StrSub(sString,iCurrent+1,1)=="Q") Then iCurrent = iCurrent+2
Else iCurrent = iCurrent+1
Break
Case 82 ; Char2Num("R")
; french e.g. "rogier", but exclude "hochmeier"
bValue1 = (iCurrent==iLast)
bValue2 = !bIsSlavoGermanic
bValue3 = (StrSub(sString,iCurrent-2,2)=="IE")
bValue4 = !ItemLocate(StrSub(sString,iCurrent-4,2),"ME,MA",",")
If (bValue1 && bValue2 && bValue3 && bValue4)
sPrimary = StrCat(sPrimary,"")
sSecondary = StrCat(sSecondary,"R")
Else
sPrimary = StrCat(sPrimary,"R")
sSecondary = StrCat(sSecondary,"R")
EndIf
If (StrSub(sString,iCurrent+1,1)=="R") Then iCurrent = iCurrent+2
Else iCurrent = iCurrent+1
Break
Case 83 ; Char2Num("S")
; special cases "island", "isle", "carlisle", "carlysle"
If (ItemLocate(StrSub(sString,iCurrent-1,3),"ISL,YSL",","))
iCurrent = iCurrent+1
Break
EndIf
; special case "sugar-"
If ( (iCurrent==1) && (StrSub(sString,iCurrent,5)=="SUGAR") )
sPrimary = StrCat(sPrimary,"X")
sSecondary = StrCat(sSecondary,"S")
iCurrent = iCurrent+1
Break
EndIf
If (StrSub(sString,iCurrent,2)=="SH")
; germanic
If (ItemLocate(StrSub(sString,iCurrent+1,4),"HEIM,HOEK,HOLM,HOLZ",","))
sPrimary = StrCat(sPrimary,"S")
sSecondary = StrCat(sSecondary,"S")
Else
sPrimary = StrCat(sPrimary,"X")
sSecondary = StrCat(sSecondary,"X")
EndIf
iCurrent = iCurrent+2
Break
EndIf
; italian & armenian
If ( ItemLocate(StrSub(sString,iCurrent,3),"SIO,SIA",",") || (StrSub(sString,iCurrent,4)=="SIAN") )
If !bIsSlavoGermanic
sPrimary = StrCat(sPrimary,"S")
sSecondary = StrCat(sSecondary,"X")
Else
sPrimary = StrCat(sPrimary,"S")
sSecondary = StrCat(sSecondary,"S")
EndIf
iCurrent = iCurrent+3
Break
EndIf
; german & anglicisations, e.g. "smith" match "schmidt","snider" match "schneider"
; also, -sz- in slavic language altho in hungarian it is pronounced "s"
If ( ( (iCurrent==1) && ItemLocate(StrSub(sString,iCurrent+1,1),"M,N,L,W",",") ) || (StrSub(sString,iCurrent+1,1)=="Z") )
sPrimary = StrCat(sPrimary,"S")
sSecondary = StrCat(sSecondary,"X")
If (StrSub(sString,iCurrent+1,1)=="Z") Then iCurrent = iCurrent+2
Else iCurrent = iCurrent+1
Break
EndIf
If (StrSub(sString,iCurrent,2)=="SC")
; Schlesinger's rule
If (StrSub(sString,iCurrent + 2,1)=="H")
; dutch origin, e.g. "school","schooner"
If ItemLocate(StrSub(sString,iCurrent+3,2),"OO,ER,EN,UY,ED,EM",",")
; 'schermerhorn', 'schenker'
If ItemLocate(StrSub(sString,iCurrent+3,2),"ER,EN",",")
sPrimary = StrCat(sPrimary,"X")
sSecondary = StrCat(sSecondary,"SK")
Else
sPrimary = StrCat(sPrimary,"SK")
sSecondary = StrCat(sSecondary,"SK")
EndIf
iCurrent = iCurrent+3
Break
Else
If ( (iCurrent==1) && !StrIndex("AEIOUY",StrSub(sString,3,1),1,@FWDSCAN) && (StrSub(sString,iCurrent+3,1)<>"W") )
sPrimary = StrCat(sPrimary,"X")
sSecondary = StrCat(sSecondary,"S")
Else
sPrimary = StrCat(sPrimary,"X")
sSecondary = StrCat(sSecondary,"X")
EndIf
iCurrent = iCurrent+3
Break
EndIf
EndIf
If (ItemLocate(StrSub(sString,iCurrent+2,1),"I,E,Y",","))
sPrimary = StrCat(sPrimary,"S")
sSecondary = StrCat(sSecondary,"S")
iCurrent = iCurrent+3
Break
EndIf
; else
sPrimary = StrCat(sPrimary,"SK")
sSecondary = StrCat(sSecondary,"SK")
iCurrent = iCurrent+3
Break
EndIf
; french e.g. "resnais","artois"
If ( (iCurrent==iLast) && ItemLocate(StrSub(sString,iCurrent-2,2),"AI,OI",",") )
sPrimary = StrCat(sPrimary,"")
sSecondary = StrCat(sSecondary,"S")
Else
sPrimary = StrCat(sPrimary,"S")
sSecondary = StrCat(sSecondary,"S")
EndIf
If (ItemLocate(StrSub(sString,iCurrent+1,1),"S,Z",",")) Then iCurrent = iCurrent+2
Else iCurrent = iCurrent+1
Break
Case 84 ; Char2Num("T")
If (StrSub(sString,iCurrent,4)=="TION")
sPrimary = StrCat(sPrimary,"X")
sSecondary = StrCat(sSecondary,"X")
iCurrent = iCurrent+3
Break
EndIf
If (ItemLocate(StrSub(sString,iCurrent,3),"TIA,TCH",","))
sPrimary = StrCat(sPrimary,"X")
sSecondary = StrCat(sSecondary,"X")
iCurrent = iCurrent+3
Break
EndIf
If ( (StrSub(sString,iCurrent,2)=="TH") || (StrSub(sString,iCurrent,3)=="TTH") )
; special case "thomas", "thames" or germanic
If ( ItemLocate(StrSub(sString,iCurrent+2,2),"OM,AM",",") || ItemLocate(StrSub(sString,1,4),"VAN ,VON ",",") || (StrSub(sString,1,3)=="SCH") )
sPrimary = StrCat(sPrimary,"T")
sSecondary = StrCat(sSecondary,"T")
Else
sPrimary = StrCat(sPrimary,"0") ; "0" is "th"
sSecondary = StrCat(sSecondary,"T")
EndIf
iCurrent = iCurrent+2
Break
EndIf
sPrimary = StrCat(sPrimary,"T")
sSecondary = StrCat(sSecondary,"T")
If ItemLocate(StrSub(sString,iCurrent+1,1),"T,D",",") Then iCurrent = iCurrent+2
Else iCurrent = iCurrent+1
Break
Case 86 ; Char2Num("V")
sPrimary = StrCat(sPrimary,"F")
sSecondary = StrCat(sSecondary,"F")
If (StrSub(sString,iCurrent+1,1)=="V") Then iCurrent = iCurrent+2
Else iCurrent = iCurrent+1
Break
Case 87 ; Char2Num("W")
; can also be in middle of word
If (StrSub(sString,iCurrent,2)=="WR")
sPrimary = StrCat(sPrimary,"R")
sSecondary = StrCat(sSecondary,"R")
iCurrent = iCurrent+2
Break
EndIf
If ( (iCurrent==1) && ( StrIndex("AEIOUY",StrSub(sString,iCurrent+1,1),1,@FWDSCAN) || (StrSub(sString,iCurrent,2)=="WH") ) )
; Wasserman should match Vasserman
If StrIndex("AEIOUY",StrSub(sString,iCurrent+1,1),1,@FWDSCAN)
sPrimary = StrCat(sPrimary,"A")
sSecondary = StrCat(sSecondary,"F")
Else
; need Uomo to match Womo
sPrimary = StrCat(sPrimary,"A")
sSecondary = StrCat(sSecondary,"A")
EndIf
EndIf
; Arnow should match Arnoff
bValue1 = (iCurrent==iLast)
bValue2 = StrIndex("AEIOUY",StrSub(sString,iCurrent-1,1),1,@FWDSCAN)
bValue3 = ItemLocate(StrSub(sString,iCurrent-1,5),"EWSKI,EWSKY,OWSKI,OWSKY",",")
bValue4 = (StrSub(sString,1,3)=="SCH")
If ( (bValue1 && bValue2) || bValue3 || bValue4 )
sPrimary = StrCat(sPrimary,"")
sSecondary = StrCat(sSecondary,"F")
iCurrent = iCurrent+1
Break
EndIf
; polish e.g. "filipowicz"
If ItemLocate(StrSub(sString,iCurrent,4),"WICZ,WITZ",",")
sPrimary = StrCat(sPrimary,"TS")
sSecondary = StrCat(sSecondary,"FX")
iCurrent = iCurrent+4
Break
EndIf
; else skip it
iCurrent = iCurrent+1
Break
Case 88 ; Char2Num("X")
; french e.g. breaux
bValue1 = (iCurrent==iLast)
bValue2 = ItemLocate(StrSub(sString,iCurrent-3,3),"IAU,EAU",",")
bValue3 = ItemLocate(StrSub(sString,iCurrent-2,2),"AU,OU",",")
If ( !( bValue1 && (bValue2 || bValue3) ) )
sPrimary = StrCat(sPrimary,"KS")
sSecondary = StrCat(sSecondary,"KS")
EndIf
If ItemLocate(StrSub(sString,iCurrent+1,1),"C,X",",") Then iCurrent = iCurrent+2
Else iCurrent = iCurrent+1
Break
Case 90 ; Char2Num("Z")
; chinese pinyin e.g. "zhao"
If (StrSub(sString,iCurrent+1,1)=="H")
sPrimary = StrCat(sPrimary,"J")
sSecondary = StrCat(sSecondary,"J")
iCurrent = iCurrent+2
Break
Else
bValue1 = ItemLocate(StrSub(sString,iCurrent+1,2),"ZO,ZI,ZA",",")
bValue2 = bIsSlavoGermanic
bValue3 = (iCurrent>1)
bValue4 = (StrSub(sString,iCurrent-1,1)<>"T")
If ( bValue1 || ( bValue2 && (bValue3 && bValue4) ) )
sPrimary = StrCat(sPrimary,"S")
sSecondary = StrCat(sSecondary,"TS")
Else
sPrimary = StrCat(sPrimary,"S")
sSecondary = StrCat(sSecondary,"S")
EndIf
EndIf
If (StrSub(sString,iCurrent+1,1)=="Z") Then iCurrent = iCurrent+2
Else iCurrent = iCurrent+1
Break
Case iChar
iCurrent = iCurrent+1
Break
EndSwitch
EndWhile
sPrimary = StrSub(sPrimary,1,iCodeLength)
sSecondary = StrSub(sSecondary,1,iCodeLength)
Return (StrCat(sPrimary,@TAB,sSecondary))
;..........................................................................................................................................
; This function "udfDoubleMetaphone" returns a list of two key coded strings separated by @TAB delimiter.
; The function gives back the pronounciation of the original input string
; most likely to be heard in the U.S. in the first key, and the native sound in the second key.
;
; This "sounds like" algorithm was developed by Lawrence Philips <lphilips@verity.com> in 1998..1999,
; published in the June, 2000 issue of C/C++ Users Journal, "The Double Metaphone Search Algorithm".
; Double Metaphone is an improved version of Philips' original Metaphone algorithm
; published in Computer Language magazine in 1990.
;
; This WinBatch translation is based on the PHP code from Stephen Woodbridge <woodbri@swoodbridge.com>,
; which incorporates several bug fixes courtesy of Kevin Atkinson <kevina@users.sourceforge.net>,
; which is based heavily on the C implementation by Maurice Aubrey <maurice@hevanet.com>,
; which in turn is based heavily on the C++ implementation by Lawrence Philips.
;..........................................................................................................................................
; Note:
; The logic in the algorithm is particularly frightening (basically given by the problem).
; A giant switch statement analyses the input string character by character,
; and considers secondary conditions from the language environment.
; There are some logic resp. code structures in the original C version,
; and left in this implementation too, which are able to be tuned up for speed,
; even for the WinBatch interpreter.
; I have tried to implement some refinements but as soon as the script blows up rapidly
; and becomes more and more unreadable, I decided to fall back, and encoded the logic
; as near as possible and most similar like the referenced C and PHP scripts.
; There is no guarantee, that the result of this implementation will be compatible with the result of other
; implementations of the Double Metaphone algorithm (because lack of reliable reference test data).
; Needless to say, Double Metaphone will give you a better hit rate on name searches than the previous version.
;..........................................................................................................................................
; Detlev Dalitz.20020802
;..........................................................................................................................................
#EndFunction
:skip_udfdoublemetaphone
;-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
; --- test ---
sMetaphone100= "This is WinBatch code." ; 0SSNPXKT ; TSSNPXKT ; "0" is "th"
sMetaphone99 = "Angelika" ; ANJLK ; ANKLK
sMetaphone98 = "Christopher" ; KRSTFR ; KRSTFR
sMetaphone97 = "McComb" ; MKMP ; MKMP
sMetaphone96 = "Susan" ; SSN ; SSN
sMetaphone95 = "Souzanna" ; SSN ; SSN
sMetaphone94 = "Schubert" ; XPRT ; SPRT
sMetaphone93 = "Shubert" ; XPRT ; XPRT
sMetaphone92 = "technical" ; TKNKL ; TKNKL
sMetaphone91 = "chemical" ; KMKL ; KMKL
sMetaphone90 = "Lee" ; L ; L
sMetaphone89 = "Bonner" ; PNR ; PNR
sMetaphone88 = "Baymore" ; PMR ; PMR
sMetaphone87 = "Saneed" ; SNT ; SNT
sMetaphone86 = "Van Hoesen" ; FNSN ; FNSN
sMetaphone85 = "Vincenco" ; FNSNK ; FNSNK ; should be more like "FNSNS"
sMetaphone84 = "Jürgen von Manger" ; JRJNFNMNKR ; ARKNFNMNJR
sMetaphone83 = "Edgar Wallace" ; ATKRLS ; ATKRLS
sMetaphone82 = "Stuart Granger" ; STRTKRNKR ; STRTKRNJR
sMetaphone81 = "Detlev Dalitz" ; TTLFTLTS ; TTLFTLTS
sMetaphone80 = "Roger Daltry" ; RKRTLTR ; RJRTLTR
sMetaphone79 = "Lukasiewicz" ; LKSTS ; LKSFX
sMetaphone78 = "Van Houten" ; FNTN ; FNTN
sMetaphone77 = "Kuczewski" ; KSSK ; KXFSK
sMetaphone76 = "Bordeaux" ; PRT ; PRT
sMetaphone75 = "Breaux" ; PR ; PR
sMetaphone74 = "Zhao" ; J ; J
sMetaphone73 = "Womo" ; AM ; FM
sMetaphone72 = "Uomo" ; AM ; AM
sMetaphone71 = "Arnoff" ; ARNF ; ARNF
sMetaphone70 = "Arnow" ; ARN ; ARNF
sMetaphone69 = "Filipovicz" ; FLPFS ; FLPFX
sMetaphone68 = "Vasserman" ; FSRMN ; FSRMN
sMetaphone67 = "Wassermann" ; ASRMN ; FSRMN
sMetaphone66 = "Thames" ; TMS ; TMS
sMetaphone65 = "Thomas" ; TMS ; TMS
sMetaphone64 = "school" ; SKL ; SKL ?
sMetaphone63 = "schooner" ; SKNR ; SKNR
sMetaphone62 = "schenker" ; XNKR ; SKNKR
sMetaphone61 = "snider" ; SNTR ; XNTR
sMetaphone60 = "schneider" ; XNTR ; SNTR
sMetaphone59 = "schmidt" ; XMT ; SMT
sMetaphone58 = "smith" ; SM0 ; XMT ; "0" is "th"
sMetaphone57 = "Carlysle" ; KRLL ; KRLL
sMetaphone56 = "Carlisle" ; KRLL ; KRLL
sMetaphone55 = "Eisel" ; ASL ; ASL
sMetaphone54 = "Isle" ; AL ; AL
sMetaphone53 = "Island" ; ALNT ; ALNT
sMetaphone52 = "Rogier" ; RJ ; RJR
sMetaphone51 = "Hochmeier" ; HKMR ; HKMR
sMetaphone50 = "Raspberry" ; RSPR ; RSPR
sMetaphone49 = "Campbell" ; KMPL ; KMPL
sMetaphone48 = "Gallegos" ; KLKS ; KKS
sMetaphone47 = "Cabrillo" ; KPRL ; KPRL
sMetaphone46 = "Señor" ; SNR ; SNR
sMetaphone45 = "Jablunowski" ; JPLNSK ; APLNFSK
sMetaphone44 = "Yankelovich" ; ANKLFX ; ANKLFK
sMetaphone43 = "Jankelowicz" ; JNKLTS ; ANKLFX
sMetaphone42 = "bajador" ; PJTR ; PHTR
sMetaphone41 = "San Jacinto" ; SNHSNT ; SNHSNT
sMetaphone40 = "Jose" ; HS ; HS
sMetaphone39 = "Cagney" ; KKN ; KKN
sMetaphone38 = "Biaggi" ; PJ ; PK
sMetaphone37 = "tagliaro" ; TKLR ; TLR
sMetaphone36 = "McLaughlin" ; MKLFLN ; MKLFLN
sMetaphone35 = "broughton" ; PRTN ; PRTN
sMetaphone34 = "bough" ; P ; P
sMetaphone33 = "hugh" ; H ; H
sMetaphone32 = "ghislane" ; JLN ; JLN
sMetaphone31 = "ghiradelli" ; JRTL ; JRTL
sMetaphone30 = "Toto" ; TT ; TT
sMetaphone29 = "Edgar" ; ATKR ; ATKR
sMetaphone28 = "mac gregor" ; MKRKR ; MKRKR
sMetaphone27 = "mac caffrey" ; MKFR ; MKFR
sMetaphone26 = "McHugh" ; MK ; MK
sMetaphone25 = "Bacchus" ; PKS ; PKS
sMetaphone24 = "Bellocchio" ; PLX ; PLX
sMetaphone23 = "Bertucci" ; PRTX ; PRTX
sMetaphone22 = "Bacci" ; PX ; PX
sMetaphone21 = "Broccoli" ; PRKL ; PRKL
sMetaphone20 = "Focaccia" ; FKX ; FKX
sMetaphone19 = "Czerny" ; SRN ; XRN
sMetaphone18 = "Tucker" ; TKR ; TKR
sMetaphone17 = "Tuchner" ; TKNR ; TKNR
sMetaphone16 = "Tischler" ; TXLR ; TXLR
sMetaphone15 = "Wechsler" ; AKSLR ; FKSLR
sMetaphone14 = "Wachtler" ; AXTLR ; FKTLR
sMetaphone13 = "Orchester" ; ARKSTR ; ARKSTR
sMetaphone12 = "Architekt" ; ARKTKT ; ARKTKT
sMetaphone11 = "chorus" ; KRS ; KRS
sMetaphone10 = "chemistry" ; KMSTR ; KMSTR
sMetaphone9 = "Michaelis" ; MKLS ; MXLS
sMetaphone8 = "Kant" ; KNT ; KNT
sMetaphone7 = "Chianti" ; KNT ; KNT
sMetaphone6 = "Caesar" ; SSR ; SSR
sMetaphone5 = "Aschenbecher" ; AXNPXR ; ASKNPKR
sMetaphone4 = "Achenbacher" ; AXNPXR ; AKNPKR
sMetaphone3 = "Garçon" ; KRSN ; KRSN
sMetaphone2 = "dumb" ; TM ; TM
sMetaphone1 = "Xavier" ; SF ; SFR
BoxOpen("Demo udfDoubleMetaphone (sString, iCodeLength)","")
sFileOut = StrCat(Environment("TEMP"),"\Metaphone.txt")
hFW = FileOpen(sFileOut,"WRITE")
For i=1 To 100
If IsDefined(sMetaphone%i%)
sMetaphone%i% = StrCat(sMetaphone%i%,@TAB,udfDoubleMetaphone(sMetaphone%i%,10))
BoxText(sMetaphone%i%)
FileWrite(hFW,sMetaphone%i%)
EndIf
Next
FileClose(hFW)
BoxShut()
Run("notepad",sFileOut)
Exit
;-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
;*EOF*
udfLevenshtein (sString1, sString2)
;------------------------------------------------------------------------------------------------------------------------------------------
If ItemLocate("udflevenshtein",IntControl(77,103,0,0,0),@TAB) Then Goto skip_udflevenshtein
#DefineFunction udfLevenshtein (sString1, sString2)
iStrLen1 = StrLen(sString1)
iStrLen2 = StrLen(sString2)
If !iStrLen1 Then Return (iStrLen2)
If !iStrLen2 Then Return (iStrLen1)
If (iStrLen1>255) Then Return (-1) ; see Note at end of function.
If (iStrLen2>255) Then Return (-1) ; see Note at end of function.
;..........................................................................................................................................
; Cleanup procedures, not quite necessary, but useful.
sString1 = StrUpper(sString1)
sString1 = StrClean(sString1,"ÄÅÃÂÁÀ","A",@TRUE,1)
sString1 = StrClean(sString1,"ËÊÉÈ" ,"E",@TRUE,1)
sString1 = StrClean(sString1,"ÏÎÍÌ" ,"I",@TRUE,1)
sString1 = StrClean(sString1,"ÒÓÔÕÖ" ,"O",@TRUE,1)
sString1 = StrClean(sString1,"ÜÛÚÙ" ,"U",@TRUE,1)
sString1 = StrReplace(sString1,"Ç","C")
sString1 = StrReplace(sString1,"Ñ","N")
sString2 = StrUpper(sString2)
sString2 = StrClean(sString2,"ÄÅÃÂÁÀ","A",@TRUE,1)
sString2 = StrClean(sString2,"ËÊÉÈ" ,"E",@TRUE,1)
sString2 = StrClean(sString2,"ÏÎÍÌ" ,"I",@TRUE,1)
sString2 = StrClean(sString2,"ÒÓÔÕÖ" ,"O",@TRUE,1)
sString2 = StrClean(sString2,"ÜÛÚÙ" ,"U",@TRUE,1)
sString2 = StrReplace(sString2,"Ç","C")
sString2 = StrReplace(sString2,"Ñ","N")
sString1 = StrClean(sString1,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","",@TRUE,2)
sString2 = StrClean(sString2,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","",@TRUE,2)
;..........................................................................................................................................
; The Levenshtein algorithm
iStrLen1 = StrLen(sString1)
iStrLen2 = StrLen(sString2)
aArray = ArrDimension(iStrLen1+1,iStrLen2+1)
For iRow=0 To iStrLen1
aArray[iRow,0] = iRow
Next
For iCol=0 To iStrLen2
aArray[0,iCol] = iCol
Next
For iRow=1 To iStrLen1
For iCol=1 To iStrLen2
iCost = StrSub(sString1,iRow,1) <> StrSub(sString2,iCol,1)
iRowPrev = iRow-1
iColPrev = iCol-1
aArray[iRow,iCol] = Min(1+aArray[iRowPrev,iCol],1+aArray[iRow,iColPrev],iCost+aArray[iRowPrev,iColPrev])
Next
Next
iDistance = aArray[iStrLen1,iStrLen2]
Drop(aArray,aString1,aString2)
Return (iDistance)
;..........................................................................................................................................
; This function returns an integer number which indicates the Levenshtein-Distance between the two
; argument strings or -1, if one of the argument strings is longer than the limit of 255 characters
; (255 should be more than enough for name or dictionary comparison).
;
; The Levenshtein distance is defined as the minimal number of characters you have to replace,
; insert or delete to transform sString1 into sString2.
;
; The greater the Levenshtein-Distance, the more different the strings are.
; Levenshtein-Distance is named after the Russian scientist Vladimir Levenshtein,
; who devised the algorithm in 1965.
; In its simplest form the function will take only the two strings as parameter and will calculate
; just the number of insert, replace and delete operations needed to transform sString1 into sString2.
;
; If you can't spell or pronounce Levenshtein, the metric is also sometimes called 'edit distance'.
; The Levenshtein distance algorithm has been used in:
; - Spell checking, - Speech recognition, - DNA analysis, - Plagiarism detection .
;
; Reference: http://www.merriampark.com/ld.htm
;
; I added some character 'cleaning' procedures prior to the specific Levenshtein algorithm.
;
; Detlev Dalitz.20020805.20030212
;..........................................................................................................................................
#EndFunction
:skip_udflevenshtein
;------------------------------------------------------------------------------------------------------------------------------------------
; --- test ---
sString1 = ""
sString2 = ""
iLevenshteinDistance1 = udfLevenshtein(sString1,sString2) ; 0 ; identical
sString1 = ""
sString2 = "Dalitz"
iLevenshteinDistance2 = udfLevenshtein(sString1,sString2) ; 6
sString1 = "GLAVIN"
sString2 = "GLAVIN"
iLevenshteinDistance3 = udfLevenshtein(sString1,sString2) ; 0 ; identical
sString1 = "GLAVINE"
sString2 = "GLAWYN"
iLevenshteinDistance4 = udfLevenshtein(sString1,sString2) ; 3
sString1 = "DeLedes"
sString2 = "Dalitz"
iLevenshteinDistance5 = udfLevenshtein(sString1,sString2) ; 5
sString1 = "Darlitz"
sString2 = "Dalitz"
iLevenshteinDistance6 = udfLevenshtein(sString1,sString2) ; 1 ; high similarity
sString1 = "Dahlicz"
sString2 = "Dalitz"
iLevenshteinDistance7 = udfLevenshtein(sString1,sString2) ; 2
sString1 = "Daley"
sString2 = "Dalitz"
iLevenshteinDistance8 = udfLevenshtein(sString1,sString2) ; 3
sString1 = "Dallwitz"
sString2 = "Dalitz"
iLevenshteinDistance9 = udfLevenshtein(sString1,sString2) ; 2
sString1 = "Duhlitz"
sString2 = "Dalitz"
iLevenshteinDistance10 = udfLevenshtein(sString1,sString2) ; 2
sString1 = "Talfritz"
sString2 = "Dalitz"
iLevenshteinDistance11 = udfLevenshtein(sString1,sString2) ; 3
sString1 = "FORTRAN"
sString2 = "BASIC"
iLevenshteinDistance12 = udfLevenshtein(sString1,sString2) ; 7 ; 'very low similarity'
sString1 = "Alex"
sString2 = "Alexander"
iLevenshteinDistance13 = udfLevenshtein(sString1,sString2) ; 5
Exit
;------------------------------------------------------------------------------------------------------------------------------------------
;*EOF*
udfGuthMatch (sString1, sString2, iMode)
;------------------------------------------------------------------------------------------------------------------------------------------
If ItemLocate("udfguthmatch",IntControl(77,103,0,0,0),@TAB) Then Goto skip_udfguthmatch
#DefineFunction udfGuthMatch (sString1, sString2, iMode)
iMode = Min(3,Max(1,iMode))
bMatch = @FALSE
iMatch = 0
If (sString1=="") Then Goto LabelReturn
If (sString2=="") Then Goto LabelReturn
iMatch = 100
sString1 = StrUpper(sString1)
sString2 = StrUpper(sString2)
bMatch = (sString1==sString2)
If bMatch Then Goto LabelReturn
;..........................................................................................................................................
; Cleanup procedures, not quite necessary, but useful.
sString1 = StrClean(sString1,"ÄÅÃÂÁÀ","A",@TRUE,1)
sString2 = StrClean(sString2,"ÄÅÃÂÁÀ","A",@TRUE,1)
bMatch = (sString1==sString2)
If bMatch Then Goto LabelReturn
sString1 = StrClean(sString1,"ËÊÉÈ" ,"E",@TRUE,1)
sString2 = StrClean(sString2,"ËÊÉÈ" ,"E",@TRUE,1)
bMatch = (sString1==sString2)
If bMatch Then Goto LabelReturn
sString1 = StrClean(sString1,"ÏÎÍÌ" ,"I",@TRUE,1)
sString2 = StrClean(sString2,"ÏÎÍÌ" ,"I",@TRUE,1)
bMatch = (sString1==sString2)
If bMatch Then Goto LabelReturn
sString1 = StrClean(sString1,"ÒÓÔÕÖ" ,"O",@TRUE,1)
sString2 = StrClean(sString2,"ÒÓÔÕÖ" ,"O",@TRUE,1)
bMatch = (sString1==sString2)
If bMatch Then Goto LabelReturn
sString1 = StrClean(sString1,"ÜÛÚÙ" ,"U",@TRUE,1)
sString2 = StrClean(sString2,"ÜÛÚÙ" ,"U",@TRUE,1)
bMatch = (sString1==sString2)
If bMatch Then Goto LabelReturn
sString1 = StrReplace(sString1,"Ç","C")
sString2 = StrReplace(sString2,"Ç","C")
bMatch = (sString1==sString2)
If bMatch Then Goto LabelReturn
sString1 = StrReplace(sString1,"Ñ","N")
sString2 = StrReplace(sString2,"Ñ","N")
bMatch = (sString1==sString2)
If bMatch Then Goto LabelReturn
sString1 = StrClean(sString1,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","",@TRUE,2)
sString2 = StrClean(sString2,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","",@TRUE,2)
bMatch = (sString1==sString2)
If bMatch Then Goto LabelReturn
iS1Len = StrLen(sString1)
If !iS1Len Then Goto LabelReturn
iS2Len = StrLen(sString2)
If !iS2Len Then Goto LabelReturn
;..........................................................................................................................................
; The Guth algorithm
bMatch = @TRUE
iMatch = 0
iS1 = 0
iS2 = 0
sChar1 = ""
sChar2 = ""
While 1
If (sChar1<>"") Then If (sChar2<>"") Then If (sChar1==sChar2) Then iMatch = iMatch+1
; case (S1:x+0,S2:x+0)
iS1 = iS1+1
iS2 = iS2+1
If (iS1>iS1Len) Then If (iS2>iS2Len) Then Break
sChar1 = StrSub(sString1,iS1,1)
sChar2 = StrSub(sString2,iS2,1)
If (sChar1==sChar2) Then Continue
; case (S1:x+0,S2:x+1)
iS2 = iS2+1
If (iS1>iS1Len) Then If (iS2>iS2Len) Then Break
sChar2 = StrSub(sString2,iS2,1)
If (sChar1==sChar2) Then Continue
; case (S1:x+0,S2:x+2)
iS2 = iS2+1
If (iS1>iS1Len) Then If (iS2>iS2Len) Then Break
sChar2 = StrSub(sString2,iS2,1)
If (sChar1==sChar2) Then Continue
; case (S1:x+0,S2:x-1)
iS2 = iS2-3
If (iS1>iS1Len) Then If (iS2>iS2Len) Then Break
sChar2 = StrSub(sString2,iS2,1)
If (sChar1==sChar2) Then Continue
; case (S1:x-1,S2:x+0)
iS1 = iS1-1
iS2 = iS2+1
If (iS1>iS1Len) Then If (iS2>iS2Len) Then Break
sChar1 = StrSub(sString1,iS1,1)
sChar2 = StrSub(sString2,iS2,1)
If (sChar1==sChar2) Then Continue
; case (S1:x+1,S2:x+0)
iS1 = iS1+2
If (iS1>iS1Len) Then If (iS2>iS2Len) Then Break
sChar1 = StrSub(sString1,iS1,1)
If (sChar1==sChar2) Then Continue
; case (S1:x+2,S2:x+0)
iS1 = iS1+1
If (iS1>iS1Len) Then If (iS2>iS2Len) Then Break
sChar1 = StrSub(sString1,iS1,1)
If (sChar1==sChar2) Then Continue
; case (S1:x+1,S2:x+1)
iS1 = iS1-1
iS2 = iS2+1
If (iS1>iS1Len) Then If (iS2>iS2Len) Then Break
sChar1 = StrSub(sString1,iS1,1)
sChar2 = StrSub(sString2,iS2,1)
If (sChar1==sChar2) Then Continue
; case (S1:x+2,S2:x+1) ; added by Detlev Dalitz.20020806
iS1 = iS1+1
If (iS1>iS1Len) Then If (iS2>iS2Len) Then Break
sChar1 = StrSub(sString1,iS1,1)
If (sChar1==sChar2) Then Continue
; case (S1:x+2,S2:x+2)
iS2 = iS2+1
If (iS1>iS1Len) Then If (iS2>iS2Len) Then Break
sChar2 = StrSub(sString2,iS2,1)
If (sChar1==sChar2) Then Continue
; case no match
bMatch = @FALSE
If (iMode==1) Then Break
iS1 = iS1-1
iS2 = iS2-1
EndWhile
If (iMode<>1) Then iMatch = 100*2*Min(iMatch,iS1Len,iS2Len)/(iS1Len+iS2Len)
:LabelReturn
If (iMode==1) Then Return (bMatch)
If (iMode==2) Then Return (iMatch)
If (iMode==3) Then Return (StrCat(bMatch,@TAB,iMatch))
;..........................................................................................................................................
; This Function "udfGuthMatch" makes use of an algorithm envolved by Gloria J.A. Guth in 1976.
; The description of the algorithm was taken from "An Assessment of Name Matching Algorithms",
; A. J. Lait and B. Randell, "NameMatching.pdf", Created: 16.04.2001 15:26:54.
;
; The algorithm is a letter-by-letter comparison, and has obvious advantages when applied to names of multi-ethnic population.
;
; I added some character 'cleaning' procedures prior to the specific Guth algorithm.
; Also added a compare case (S1:x+2,S2:x+1), which had not exist in the above mentioned referenced "NameMatching.pdf".
;
; The result of the function can be modified by parameter iMode:
; iMode=1 ... Returns a boolean value (@FALSE..@TRUE, 0..1),
; which indicates if the two given strings are matched upon the rules of the modified Guth algorithm.
; iMode=2 ... Returns an integer value (percent number 0..100),
; which indicates how 'good' the given strings are matched together.
; This gives a measure of the degree of similarity.
; iMode=3 ... Returns a list of two items separated by @TAB delimiter, e.g. "1@TAB76" or "0@TAB30",
; which combines the boolean value and the percentage value.
;..........................................................................................................................................
; Note:
; There are some anomalies left in the algorithm:
; "Daley,Dalitz"
; which gives "@FALSE,54 pct.", this maybe @FALSE anywhere, although the similarity value is slightly better than 50 pct..
; That is caused by the given names, which seems to be to short for a reliable comparison.
; "Alex,Alexander"
; which gives "@FALSE,61 pct.", this maybe @TRUE anywhere, because the similarity value is relatively high above 50 pct..
; This is often the case where one element of a name pair is an abbreviated shorter form of the other.
; Those results should be further examinated by some other method, maybe by a soundex or metaphone algorithm.
;..........................................................................................................................................
; Detlev Dalitz.20020806
;..........................................................................................................................................
#EndFunction
:skip_udfguthmatch
;------------------------------------------------------------------------------------------------------------------------------------------
; --- test ---
sString1 = "GLAVIN"
sString2 = "GLAVIN"
iMode = 3
sGuthMatch1 = udfGuthMatch(sString1,sString2,iMode) ; @TRUE 100 pct.
sString1 = "GLAVINE"
sString2 = "GLAWYN"
iMode = 3
sGuthMatch2 = udfGuthMatch(sString1,sString2,iMode) ; @TRUE 61 pct.
sString1 = "DeLedes"
sString2 = "Dalitz"
iMode = 3
sGuthMatch3 = udfGuthMatch(sString1,sString2,iMode) ; @FALSE 30 pct.
sString1 = "Darlitz"
sString2 = "Dalitz"
iMode = 3
sGuthMatch4 = udfGuthMatch(sString1,sString2,iMode) ; @TRUE 92 pct.
sString1 = "Dahlicz"
sString2 = "Dalitz"
iMode = 3
sGuthMatch5 = udfGuthMatch(sString1,sString2,iMode) ; @TRUE 76 pct.
sString1 = "Dahley"
sString2 = "Dalitz"
iMode = 3
sGuthMatch6 = udfGuthMatch(sString1,sString2,iMode) ; @FALSE 54 pct.
sString1 = "Dallwitz"
sString2 = "Dalitz"
iMode = 3
sGuthMatch7 = udfGuthMatch(sString1,sString2,iMode) ; @TRUE 85 pct.
sString1 = "Duhlitz"
sString2 = "Dalitz"
iMode = 3
sGuthMatch8 = udfGuthMatch(sString1,sString2,iMode) ; @TRUE 76 pct.
sString1 = "Talfritz"
sString2 = "Dalitz"
iMode = 3
sGuthMatch9 = udfGuthMatch(sString1,sString2,iMode) ; @TRUE 71 pct.
sString1 = "FORTRAN"
sString2 = "BASIC"
iMode = 3
sGuthMatch10 = udfGuthMatch(sString1,sString2,iMode) ; @FALSE 0 pct.
sString1 = "Alex"
sString2 = "Alexander"
iMode = 3
sGuthMatch11 = udfGuthMatch(sString1,sString2,iMode) ; @FALSE 61 pct.
Exit
;------------------------------------------------------------------------------------------------------------------------------------------
;*EOF*