udfStrSoundex
udfStrSoundexEx
str udfStrSoundex (str)
str udfStrSoundexEx (str)
;------------------------------------------------------------------------------------------------------------------------------------------
#DefineFunction udfStrSoundex (strString)
If strString == "" Then Return ""

; Make all characters uppercase.
strString = StrUpper(strString)

; Special pre-processing for german language.
;strString = StrReplace (strString, "SCH", "S") ; german special "sch".
;strString = StrReplace (strString, "ß", "S")   ; german special sharp-s "ß".

; Remove all occurrences of non alpha chars.
strString = StrClean (strString, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "", @TRUE, 2)
If strString == "" Then Return ""

; Retain the first letter of the name.
strCharFirst = StrSub (strString, 1, 1)

; Do the coding.
strString = StrClean (strString, "BFPV", "1", @TRUE, 1)
strString = StrClean (strString, "CZSGJKQX", "2", @TRUE, 1)
strString = StrClean (strString, "DT", "3", @TRUE, 1)
strString = StrClean (strString, "L", "4", @TRUE, 1)
strString = StrClean (strString, "NM", "5", @TRUE, 1)
strString = StrClean (strString, "R", "6", @TRUE, 1)

; Shrink same adjacent chars to single char.
strCodes = StrClean ("123456", strString, "", @TRUE, 2)
intLen = StrLen (strCodes)
For intI = 1 To intLen
   strCode = StrSub (strCodes, intI, 1)
   While StrIndex (strString, strCode : strCode, 1, @FWDSCAN)
      strString = StrReplace (strString, strCode : strCode, strCode)
   EndWhile
Next

; Remove all non numericals.
strString = StrClean (strString, "123456", "", @TRUE, 2)

; Respect drop char as first char.
If StrClean (strCharFirst, "AEIOUYWH", "", @TRUE, 1) != "" Then strString = StrSub (strString, 2, -1)

; Format Soundex string.
strSoundex = StrFix (strCharFirst : strString, "0", 4)
Return strSoundex
;..........................................................................................................................................
; Soundex manipulation based on
; U.S. Patents 1261167 (1918), 1435663 (1922)
; by Margaret K. Odell and Robert C. Russel
; as used by the National Archives and Records Administration (NARA)
; (published by Don Knuth [Knuth]).
;
; Russel's method is usable for names from england, america, western europe countries,
; but does not apply well to many Slavic and Yiddish surnames
; and is not independent of several ethnic considerations.
;
; With soundex, the "sound" of names - the phonetic sound to be exact, is coded.
; This is of great help since it avoids most problems of misspellings or alternate spellings.
; For example Scherman, Schurman, Sherman and Shireman and Shurman are indexed together as "S655".
; Surname soundex indexing is not alphabetical, but is listed by the letter-and-number code.
; If several surnames have the same code, their cards are arranged alphabetically by given name.
; Example: S655 Arthur, S655 Betsy, S655 Charles.
;
; Russell Soundex Name-Matching
; The Russell Soundex Code algorithm is designed primarily for use with English names and is a
; phonetically based name matching method. The algorithm converts each name to a four-character code,
; which can be used to identify equivalent names, and is structured as follows [Knuth]:
; 1. Retain the first letter of the name, and drop all occurrences of a, e, h, i, o, u, w, y in other positions.
; 2. Assign the following numbers to the remaining letters after the first:
;    b, f, p, v              == 1
;    c, g, j, k, q, s, x, z  == 2
;    d, t                    == 3
;    l                       == 4
;    m, n                    == 5
;    r                       == 6
; 3. If two or more letters with the same code were adjacent in the original name (before step 1) (!!!),
; omit all but the first.
; 4. Convert to the form ‘letter, digit, digit, digit’ by adding trailing zeros
; (if there are less than three digits), or by dropping rightmost digits it there are more than three.
;
; For example, the names Euler, Gauss, Hilbert, Knuth and Lloyd are given the respective codes
; E460, G200, H416, K530, L300.
; However, the algorithm also gives the same codes for
; Ellery, Ghosh, Heilbronn, Kant and Ladd [Knuth] which are not related in reality.
;
; [Knuth]: D. E. Knuth, The Art Of Computer Programming, Vol. 3, Sorting and Searching, Addison Wesley, pp391-392.
;..........................................................................................................................................
; Detlev Dalitz.20020727.20100203.
;..........................................................................................................................................
#EndFunction
;------------------------------------------------------------------------------------------------------------------------------------------


;------------------------------------------------------------------------------------------------------------------------------------------
#DefineFunction udfStrSoundexEx (strString)
If strString == "" Then Return ""

; Ignore case.
strString = StrLower (strString)

; Special pre-processing for german language
;strString = StrReplace (strString, "sch", "s") ; german special "sch"
;strString = StrReplace (strString, "ß", "s")   ; german special sharp-s "ß"

; Remove all occurrences of non alpha chars.
strString = StrClean (strString, "abcdefghijklmnopqrstuvwxyz", "", @TRUE, 2)
If strString == "" Then Return ""

; Retain the first letter of the name.
strCharFirst = StrSub (strString, 1, 1)

; Do the coding.
strString = StrClean (strString, "bp", "1", @TRUE, 1)
strString = StrClean (strString, "fv", "2", @TRUE, 1)
strString = StrClean (strString, "cks", "3", @TRUE, 1)
strString = StrClean (strString, "gj", "4", @TRUE, 1)
strString = StrClean (strString, "qxz", "5", @TRUE, 1)
strString = StrClean (strString, "dt", "6", @TRUE, 1)
strString = StrClean (strString, "l", "7", @TRUE, 1)
strString = StrClean (strString, "mn", "8", @TRUE, 1)
strString = StrClean (strString, "r", "9", @TRUE, 1)

; Shrink same adjacent chars to single char.
strCodes = StrClean ("123456789", strString, "", @TRUE, 2)
intLen = StrLen (strCodes)
For intI = 1 To intLen
   strCode = StrSub (strCodes, intI, 1)
   While StrIndex (strString, strCode : strCode, 1, @FWDSCAN)
      strString = StrReplace (strString, strCode : strCode, strCode)
   EndWhile
Next

; Remove all non numericals.
strString = StrClean (strString, "123456789", "", @TRUE, 2)

; Respect drop chars.
If StrClean (strCharFirst, "aeiouywh", "", @TRUE, 1) != "" Then strString = StrSub (strString, 2, -1)

; Format Soundex string.
strSoundex = StrFix (StrUpper (strCharFirst) : strString, "0", 5)
Return strSoundex
;..........................................................................................................................................
; The "refined soundex" or "extended soundex".
; SoundexEx manipulation based on
; U.S. Patents 1261167 (1918), 1435663 (1922)
; by Margaret K. Odell and Robert C. Russel
; (published by Don Knuth [Knuth]).
; Russel's method is usable for names from england, america, western europe countries.
; [Knuth]: D. E. Knuth, The Art Of Computer Programming, Vol. 3, Sorting and Searching, Addison Wesley, pp391-392.;
; See detailed annotations at end of User Defined Function "udfSoundEx".
;..........................................................................................................................................
; Detlev Dalitz.20020727.20100203.
;..........................................................................................................................................
#EndFunction
;------------------------------------------------------------------------------------------------------------------------------------------


; Test.

strNames = ""
strNames = strNames : "123 test,24/7 test,ZYX test"
strNames = strNames : ",A,Lee,Kuhne,Meyer-Lansky,Oepping,Daley,Dalitz,Duhlitz,De Ledes"
strNames = strNames : ",Schüßler,Schmidt,Sandemann,Sinatra,Heinrich,Hammerschlag"
strNames = strNames : ",Williams,Wilms,Wilson,Zedlitz,Zotteldecke"
strNames = strNames : ",Scherman,Schurman,Sherman,Shireman,Shurman" ; indexed together as "S655" resp. "S9880"
strNames = strNames : ",Euler,Ellery"              ;  Euler, Ellery -> E460
strNames = strNames : ",Hilbert,Heilbronn"         ;  Hilbert, Heilbronn -> H416
strNames = strNames : ",Gauss,Ghosh"               ;  Gauss, Ghosh -> G200
strNames = strNames : ",Knuth,Kant"                ;  Knuth, Kant -> K530
strNames = strNames : ",Lloyd,Ladd"                ;  Lloyd, Ladd -> L300
strNames = strNames : ",Lukasiewicz,Lissajous"     ;  Lukasiewicz, Lissajous -> L222
strNames = strNames : ",Ashcroft"                  ;  Ashcroft -> A226 ; The US Census used A261 as the Soundex coding for ASHCROFT.
strNames = strNames : ",Czarkowska"                ;  Czarkowska -> C262
strNames = strNames : ",Hornblower"                ;  Hornblower -> H651
strNames = strNames : ",Looser"                    ;  Looser -> L260

intCountNames = ItemCount (strNames, ",")

ClipPut ("")

:Test1
strOut = ""
For intI = 1 To intCountNames
   strName = ItemExtract (intI, strNames, ",")
   strOut = strOut : udfStrSoundex (strName) : " = " : strName : @LF
Next
IntControl (63, 200, 100, 800, 900)
IntControl (28, 1, 0, 0, 0)
AskItemlist ("Demo udfStrSoundex (strString)", strOut, @LF, @UNSORTED, @SINGLE)
ClipAppend (StrReplace (strOut, @LF, @CRLF) : @CRLF)


:Test2
strOut = ""
For intI = 1 To intCountNames
   strName = ItemExtract (intI, strNames, ",")
   strOut = strOut : udfStrSoundexEx (strName) : " = " : strName : @LF
Next
IntControl (63, 200, 100, 800, 900)
IntControl (28, 1, 0, 0, 0)
AskItemlist ("Demo udfStrSoundexEx (strString)", strOut, @LF, @UNSORTED, @SINGLE)
ClipAppend (StrReplace (strOut, @LF, @CRLF) : @CRLF)

:CANCEL
Exit
;------------------------------------------------------------------------------------------------------------------------------------------
;   Soundex
;
;   T230 = 123 test
;   T230 = 24/7 test
;   Z232 = ZYX test
;   A000 = A
;   L000 = Lee
;   K500 = Kuhne
;   M645 = Meyer-Lansky
;   O152 = Oepping
;   D400 = Daley
;   D432 = Dalitz
;   D432 = Duhlitz
;   D432 = De Ledes
;   S460 = Schüßler
;   S530 = Schmidt
;   S535 = Sandemann
;   S536 = Sinatra
;   H562 = Heinrich
;   H562 = Hammerschlag
;   W452 = Williams
;   W452 = Wilms
;   W425 = Wilson
;   Z343 = Zedlitz
;   Z343 = Zotteldecke
;   S655 = Scherman
;   S655 = Schurman
;   S655 = Sherman
;   S655 = Shireman
;   S655 = Shurman
;   E460 = Euler
;   E460 = Ellery
;   H416 = Hilbert
;   H416 = Heilbronn
;   G200 = Gauss
;   G200 = Ghosh
;   K530 = Knuth
;   K530 = Kant
;   L300 = Lloyd
;   L300 = Ladd
;   L222 = Lukasiewicz
;   L222 = Lissajous
;   A226 = Ashcroft
;   C622 = Czarkowska
;   H651 = Hornblower
;   L260 = Looser
;------------------------------------------------------------------------------------------------------------------------------------------
;   SoundexEx
;
;   T3600 = 123 test
;   T3600 = 24/7 test
;   Z5636 = ZYX test
;   A0000 = A
;   L0000 = Lee
;   K8000 = Kuhne
;   M9783 = Meyer-Lansky
;   O1840 = Oepping
;   D7000 = Daley
;   D7650 = Dalitz
;   D7650 = Duhlitz
;   D7630 = De Ledes
;   S7900 = Schüßler
;   S8600 = Schmidt
;   S8688 = Sandemann
;   S8690 = Sinatra
;   H8930 = Heinrich
;   H8937 = Hammerschlag
;   W7830 = Williams
;   W7830 = Wilms
;   W7380 = Wilson
;   Z6765 = Zedlitz
;   Z6763 = Zotteldecke
;   S9880 = Scherman
;   S9880 = Schurman
;   S9880 = Sherman
;   S9880 = Shireman
;   S9880 = Shurman
;   E7900 = Euler
;   E7900 = Ellery
;   H7196 = Hilbert
;   H7198 = Heilbronn
;   G3000 = Gauss
;   G3000 = Ghosh
;   K8600 = Knuth
;   K8600 = Kant
;   L6000 = Lloyd
;   L6000 = Ladd
;   L3335 = Lukasiewicz
;   L3430 = Lissajous
;   A3392 = Ashcroft
;   C5933 = Czarkowska
;   H9817 = Hornblower
;   L3900 = Looser
;------------------------------------------------------------------------------------------------------------------------------------------