;------------------------------------------------------------------------------------------------------------------------------------------ #DefineFunction udfStrSoundex (strString) If strString == "" Then Return "" ; Make all characters uppercase. strString = StrUpper(strString) ; Special pre-processing for german language. ;strString = StrReplace (strString, "SCH", "S") ; german special "sch". ;strString = StrReplace (strString, "ß", "S") ; german special sharp-s "ß". ; Remove all occurrences of non alpha chars. strString = StrClean (strString, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "", @TRUE, 2) If strString == "" Then Return "" ; Retain the first letter of the name. strCharFirst = StrSub (strString, 1, 1) ; Do the coding. strString = StrClean (strString, "BFPV", "1", @TRUE, 1) strString = StrClean (strString, "CZSGJKQX", "2", @TRUE, 1) strString = StrClean (strString, "DT", "3", @TRUE, 1) strString = StrClean (strString, "L", "4", @TRUE, 1) strString = StrClean (strString, "NM", "5", @TRUE, 1) strString = StrClean (strString, "R", "6", @TRUE, 1) ; Shrink same adjacent chars to single char. strCodes = StrClean ("123456", strString, "", @TRUE, 2) intLen = StrLen (strCodes) For intI = 1 To intLen strCode = StrSub (strCodes, intI, 1) While StrIndex (strString, strCode : strCode, 1, @FWDSCAN) strString = StrReplace (strString, strCode : strCode, strCode) EndWhile Next ; Remove all non numericals. strString = StrClean (strString, "123456", "", @TRUE, 2) ; Respect drop char as first char. If StrClean (strCharFirst, "AEIOUYWH", "", @TRUE, 1) != "" Then strString = StrSub (strString, 2, -1) ; Format Soundex string. strSoundex = StrFix (strCharFirst : strString, "0", 4) Return strSoundex ;.......................................................................................................................................... ; Soundex manipulation based on ; U.S. Patents 1261167 (1918), 1435663 (1922) ; by Margaret K. Odell and Robert C. Russel ; as used by the National Archives and Records Administration (NARA) ; (published by Don Knuth [Knuth]). ; ; Russel's method is usable for names from england, america, western europe countries, ; but does not apply well to many Slavic and Yiddish surnames ; and is not independent of several ethnic considerations. ; ; With soundex, the "sound" of names - the phonetic sound to be exact, is coded. ; This is of great help since it avoids most problems of misspellings or alternate spellings. ; For example Scherman, Schurman, Sherman and Shireman and Shurman are indexed together as "S655". ; Surname soundex indexing is not alphabetical, but is listed by the letter-and-number code. ; If several surnames have the same code, their cards are arranged alphabetically by given name. ; Example: S655 Arthur, S655 Betsy, S655 Charles. ; ; Russell Soundex Name-Matching ; The Russell Soundex Code algorithm is designed primarily for use with English names and is a ; phonetically based name matching method. The algorithm converts each name to a four-character code, ; which can be used to identify equivalent names, and is structured as follows [Knuth]: ; 1. Retain the first letter of the name, and drop all occurrences of a, e, h, i, o, u, w, y in other positions. ; 2. Assign the following numbers to the remaining letters after the first: ; b, f, p, v == 1 ; c, g, j, k, q, s, x, z == 2 ; d, t == 3 ; l == 4 ; m, n == 5 ; r == 6 ; 3. If two or more letters with the same code were adjacent in the original name (before step 1) (!!!), ; omit all but the first. ; 4. Convert to the form letter, digit, digit, digit by adding trailing zeros ; (if there are less than three digits), or by dropping rightmost digits it there are more than three. ; ; For example, the names Euler, Gauss, Hilbert, Knuth and Lloyd are given the respective codes ; E460, G200, H416, K530, L300. ; However, the algorithm also gives the same codes for ; Ellery, Ghosh, Heilbronn, Kant and Ladd [Knuth] which are not related in reality. ; ; [Knuth]: D. E. Knuth, The Art Of Computer Programming, Vol. 3, Sorting and Searching, Addison Wesley, pp391-392. ;.......................................................................................................................................... ; Detlev Dalitz.20020727.20100203. ;.......................................................................................................................................... #EndFunction ;------------------------------------------------------------------------------------------------------------------------------------------ ;------------------------------------------------------------------------------------------------------------------------------------------ #DefineFunction udfStrSoundexEx (strString) If strString == "" Then Return "" ; Ignore case. strString = StrLower (strString) ; Special pre-processing for german language ;strString = StrReplace (strString, "sch", "s") ; german special "sch" ;strString = StrReplace (strString, "ß", "s") ; german special sharp-s "ß" ; Remove all occurrences of non alpha chars. strString = StrClean (strString, "abcdefghijklmnopqrstuvwxyz", "", @TRUE, 2) If strString == "" Then Return "" ; Retain the first letter of the name. strCharFirst = StrSub (strString, 1, 1) ; Do the coding. strString = StrClean (strString, "bp", "1", @TRUE, 1) strString = StrClean (strString, "fv", "2", @TRUE, 1) strString = StrClean (strString, "cks", "3", @TRUE, 1) strString = StrClean (strString, "gj", "4", @TRUE, 1) strString = StrClean (strString, "qxz", "5", @TRUE, 1) strString = StrClean (strString, "dt", "6", @TRUE, 1) strString = StrClean (strString, "l", "7", @TRUE, 1) strString = StrClean (strString, "mn", "8", @TRUE, 1) strString = StrClean (strString, "r", "9", @TRUE, 1) ; Shrink same adjacent chars to single char. strCodes = StrClean ("123456789", strString, "", @TRUE, 2) intLen = StrLen (strCodes) For intI = 1 To intLen strCode = StrSub (strCodes, intI, 1) While StrIndex (strString, strCode : strCode, 1, @FWDSCAN) strString = StrReplace (strString, strCode : strCode, strCode) EndWhile Next ; Remove all non numericals. strString = StrClean (strString, "123456789", "", @TRUE, 2) ; Respect drop chars. If StrClean (strCharFirst, "aeiouywh", "", @TRUE, 1) != "" Then strString = StrSub (strString, 2, -1) ; Format Soundex string. strSoundex = StrFix (StrUpper (strCharFirst) : strString, "0", 5) Return strSoundex ;.......................................................................................................................................... ; The "refined soundex" or "extended soundex". ; SoundexEx manipulation based on ; U.S. Patents 1261167 (1918), 1435663 (1922) ; by Margaret K. Odell and Robert C. Russel ; (published by Don Knuth [Knuth]). ; Russel's method is usable for names from england, america, western europe countries. ; [Knuth]: D. E. Knuth, The Art Of Computer Programming, Vol. 3, Sorting and Searching, Addison Wesley, pp391-392.; ; See detailed annotations at end of User Defined Function "udfSoundEx". ;.......................................................................................................................................... ; Detlev Dalitz.20020727.20100203. ;.......................................................................................................................................... #EndFunction ;------------------------------------------------------------------------------------------------------------------------------------------ ; Test. strNames = "" strNames = strNames : "123 test,24/7 test,ZYX test" strNames = strNames : ",A,Lee,Kuhne,Meyer-Lansky,Oepping,Daley,Dalitz,Duhlitz,De Ledes" strNames = strNames : ",Schüßler,Schmidt,Sandemann,Sinatra,Heinrich,Hammerschlag" strNames = strNames : ",Williams,Wilms,Wilson,Zedlitz,Zotteldecke" strNames = strNames : ",Scherman,Schurman,Sherman,Shireman,Shurman" ; indexed together as "S655" resp. "S9880" strNames = strNames : ",Euler,Ellery" ; Euler, Ellery -> E460 strNames = strNames : ",Hilbert,Heilbronn" ; Hilbert, Heilbronn -> H416 strNames = strNames : ",Gauss,Ghosh" ; Gauss, Ghosh -> G200 strNames = strNames : ",Knuth,Kant" ; Knuth, Kant -> K530 strNames = strNames : ",Lloyd,Ladd" ; Lloyd, Ladd -> L300 strNames = strNames : ",Lukasiewicz,Lissajous" ; Lukasiewicz, Lissajous -> L222 strNames = strNames : ",Ashcroft" ; Ashcroft -> A226 ; The US Census used A261 as the Soundex coding for ASHCROFT. strNames = strNames : ",Czarkowska" ; Czarkowska -> C262 strNames = strNames : ",Hornblower" ; Hornblower -> H651 strNames = strNames : ",Looser" ; Looser -> L260 intCountNames = ItemCount (strNames, ",") ClipPut ("") :Test1 strOut = "" For intI = 1 To intCountNames strName = ItemExtract (intI, strNames, ",") strOut = strOut : udfStrSoundex (strName) : " = " : strName : @LF Next IntControl (63, 200, 100, 800, 900) IntControl (28, 1, 0, 0, 0) AskItemlist ("Demo udfStrSoundex (strString)", strOut, @LF, @UNSORTED, @SINGLE) ClipAppend (StrReplace (strOut, @LF, @CRLF) : @CRLF) :Test2 strOut = "" For intI = 1 To intCountNames strName = ItemExtract (intI, strNames, ",") strOut = strOut : udfStrSoundexEx (strName) : " = " : strName : @LF Next IntControl (63, 200, 100, 800, 900) IntControl (28, 1, 0, 0, 0) AskItemlist ("Demo udfStrSoundexEx (strString)", strOut, @LF, @UNSORTED, @SINGLE) ClipAppend (StrReplace (strOut, @LF, @CRLF) : @CRLF) :CANCEL Exit ;------------------------------------------------------------------------------------------------------------------------------------------ ; Soundex ; ; T230 = 123 test ; T230 = 24/7 test ; Z232 = ZYX test ; A000 = A ; L000 = Lee ; K500 = Kuhne ; M645 = Meyer-Lansky ; O152 = Oepping ; D400 = Daley ; D432 = Dalitz ; D432 = Duhlitz ; D432 = De Ledes ; S460 = Schüßler ; S530 = Schmidt ; S535 = Sandemann ; S536 = Sinatra ; H562 = Heinrich ; H562 = Hammerschlag ; W452 = Williams ; W452 = Wilms ; W425 = Wilson ; Z343 = Zedlitz ; Z343 = Zotteldecke ; S655 = Scherman ; S655 = Schurman ; S655 = Sherman ; S655 = Shireman ; S655 = Shurman ; E460 = Euler ; E460 = Ellery ; H416 = Hilbert ; H416 = Heilbronn ; G200 = Gauss ; G200 = Ghosh ; K530 = Knuth ; K530 = Kant ; L300 = Lloyd ; L300 = Ladd ; L222 = Lukasiewicz ; L222 = Lissajous ; A226 = Ashcroft ; C622 = Czarkowska ; H651 = Hornblower ; L260 = Looser ;------------------------------------------------------------------------------------------------------------------------------------------ ; SoundexEx ; ; T3600 = 123 test ; T3600 = 24/7 test ; Z5636 = ZYX test ; A0000 = A ; L0000 = Lee ; K8000 = Kuhne ; M9783 = Meyer-Lansky ; O1840 = Oepping ; D7000 = Daley ; D7650 = Dalitz ; D7650 = Duhlitz ; D7630 = De Ledes ; S7900 = Schüßler ; S8600 = Schmidt ; S8688 = Sandemann ; S8690 = Sinatra ; H8930 = Heinrich ; H8937 = Hammerschlag ; W7830 = Williams ; W7830 = Wilms ; W7380 = Wilson ; Z6765 = Zedlitz ; Z6763 = Zotteldecke ; S9880 = Scherman ; S9880 = Schurman ; S9880 = Sherman ; S9880 = Shireman ; S9880 = Shurman ; E7900 = Euler ; E7900 = Ellery ; H7196 = Hilbert ; H7198 = Heilbronn ; G3000 = Gauss ; G3000 = Ghosh ; K8600 = Knuth ; K8600 = Kant ; L6000 = Lloyd ; L6000 = Ladd ; L3335 = Lukasiewicz ; L3430 = Lissajous ; A3392 = Ashcroft ; C5933 = Czarkowska ; H9817 = Hornblower ; L3900 = Looser ;------------------------------------------------------------------------------------------------------------------------------------------