;----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- #DefineFunction udfStrDoubleMetaphone (strString, intCodeLength) If strString == "" Then Return "" strPrimary = "" strSecondary = "" ; Set the default minimal length to 4 code characters. intCodeLength = Max (4, intCodeLength) ; Make all characters uppercase. strString = StrUpper (strString) intLength = StrLen (strString) intLast = intLength intPos = 1 ; Check for SlavoGermanic fragments. For intI = 1 To 4 blnIsSlavoGermanic = !!StrIndex (strString, ItemExtract (intI, "W,K,CZ,WITZ", ","), 0, @BACKSCAN) If blnIsSlavoGermanic Then Break Next ; Skip this at beginning of word. If !!ItemLocate (StrSub (strString, 1, 2), "GN,KN,PN,WR,PS", ",") Then intPos = intPos + 1 ; Initial "X" is pronounced "Z" e. g. "Xavier". If StrSub (strString, 1, 1) == "X" strPrimary = strPrimary : "S" strSecondary = strSecondary : "S" intPos = intPos + 1 EndIf ; Pad the original string so that we can index beyond the edge of the world. ; Five trailing spaces should be enough. strString = strString : " " ; Main loop. While (StrLen (strPrimary) < intCodeLength) || (StrLen (strSecondary) < intCodeLength) If intPos > intLength Then Break intChar = Char2Num (StrSub (strString, intPos, 1)) Switch intChar Case 65 ; Char2Num("A"). Case 69 ; Char2Num("E"). Case 73 ; Char2Num("I"). Case 79 ; Char2Num("O"). Case 85 ; Char2Num("U"). Case 89 ; Char2Num("Y"). If intPos == 1 ; All initial vowels now map to "A". strPrimary = strPrimary : "A" strSecondary = strSecondary : "A" EndIf intPos = intPos + 1 Break Case 66 ; Char2Num("B"). ; "-mb", e. g. "dumb", already skipped over ... strPrimary = strPrimary : "P" strSecondary = strSecondary : "P" intPos = intPos + 1 If StrSub (strString, intPos, 1) == "B" Then intPos = intPos + 2 Break Case 199 ; Char2Num("Ç") ; Ansi=0199 ; Ascii=128. strPrimary = strPrimary : "S" strSecondary = strSecondary : "S" intPos = intPos + 1 Break Case 67 ; Char2Num("C"). ; Various germanic. If intPos > 2 If !StrIndex ("AEIOUY", StrSub (strString, intPos - 2, 1), 1, @FWDSCAN) ; If not a vowel. If StrSub (strString, intPos - 1, 3) == "ACH" If (StrSub (strString, intPos + 2, 1) != "I") && ((StrSub (strString, intPos + 2, 1) != "E") || !!ItemLocate (StrSub (strString, intPos - 2, 6), "BACHER,MACHER", ",")) strPrimary = strPrimary : "X" ; Primary changed from "K" to "X" ; Detlev Dalitz.20020801 strSecondary = strSecondary : "K" intPos = intPos + 2 Break EndIf EndIf EndIf EndIf ; Special case "caesar". If intPos == 1 If StrSub (strString, intPos, 6) == "CAESAR" strPrimary = strPrimary : "S" strSecondary = strSecondary : "S" intPos = intPos + 2 Break EndIf EndIf ; Italian "chianti". If StrSub (strString, intPos, 4) == "CHIA" strPrimary = strPrimary : "K" strSecondary = strSecondary : "K" intPos = intPos + 2 Break EndIf If StrSub (strString, intPos, 2) == "CH" ; Find "michael". If intPos > 1 If StrSub (strString, intPos, 4) == "CHAE" strPrimary = strPrimary : "K" strSecondary = strSecondary : "X" intPos = intPos + 2 Break EndIf EndIf ; Greek roots e. g. "chemistry", "chorus". If intPos == 1 If !!ItemLocate (StrSub (strString, intPos + 1, 5), "HARAC,HARIS", ",") || !!ItemLocate (StrSub (strString, intPos + 1, 3), "HOR,HYM,HIA,HEM", ",") If StrSub (strString, 1, 5) != "CHORE" strPrimary = strPrimary : "K" strSecondary = strSecondary : "K" intPos = intPos + 2 Break EndIf EndIf EndIf ; Germanic, greek, or otherwise "ch" for "kh" sound. blnValue1 = !!ItemLocate (StrSub (strString, 1, 4), "VAN ,VON ", ",") blnValue2 = StrSub (strString, 1, 3) == "SCH" blnValue3 = !!ItemLocate (StrSub (strString, intPos - 2, 6), "ORCHES,ARCHIT,ORCHID", ",") ; 'architect but not 'arch', 'orchestra', 'orchid'. blnValue4 = !!ItemLocate (StrSub (strString, intPos + 2, 1), "T,S", ",") blnValue5 = !!ItemLocate (StrSub (strString, intPos - 1, 1), "A,O,U,E", ",") blnValue6 = intPos == 1 blnValue7 = !!ItemLocate (StrSub (strString, intPos + 2, 1), "L,R,N,M,B,H,F,V,W, ", ",") ; e. g., 'wachtler', 'wechsler', but not 'tichner'. If (blnValue1 || blnValue2) || blnValue3 || blnValue4 || ((blnValue5 || blnValue6) && blnValue7) strPrimary = strPrimary : "K" strSecondary = strSecondary : "K" Else If intPos > 1 If StrSub (strString, 1, 2) == "MC" ; e. g., "McHugh" strPrimary = strPrimary : "K" strSecondary = strSecondary : "K" Else strPrimary = strPrimary : "X" strSecondary = strSecondary : "K" EndIf Else strPrimary = strPrimary : "X" strSecondary = strSecondary : "X" EndIf EndIf intPos = intPos + 2 Break EndIf ; e. g. "czerny" If StrSub (strString, intPos, 2) == "CZ" If StrSub (strString, intPos - 2, 4) != "WICZ" strPrimary = strPrimary : "S" strSecondary = strSecondary : "X" intPos = intPos + 2 Break EndIf EndIf ; e. g. "focaccia" If StrSub (strString, intPos + 1, 3) == "CIA" strPrimary = strPrimary : "X" strSecondary = strSecondary : "X" intPos = intPos + 3 Break EndIf ; Double "C", but not McClellan". If (StrSub (strString, intPos, 2) == "CC") && !((intPos == 2) && (StrSub (strString, 1, 1) == "M")) ; 'bellocchio' but not 'bacchus'. If !!ItemLocate (StrSub (strString, intPos + 2, 1), "I,E,H", ",") && !(StrSub (strString, intPos + 2, 2) == "HU") ; 'accident', 'accede' 'succeed'. If ((intPos == 2) && (StrSub (strString, intPos - 1, 1) == "A")) || !!ItemLocate (StrSub (strString, intPos - 1, 5), "UCCEE,UCCES", ",") strPrimary = strPrimary : "KS" strSecondary = strSecondary : "KS" Else ; "bacci", "bertucci", other italian. strPrimary = strPrimary : "X" strSecondary = strSecondary : "X" EndIf intPos = intPos + 3 Break Else ; Pierce's rule. strPrimary = strPrimary : "K" strSecondary = strSecondary : "K" intPos = intPos + 2 Break EndIf EndIf If !!ItemLocate (StrSub (strString, intPos, 2), "CK,CG,CQ", ",") strPrimary = strPrimary : "K" strSecondary = strSecondary : "K" intPos = intPos + 2 Break EndIf If !!ItemLocate (StrSub (strString, intPos, 2), "CI,CE,CY", ",") ; Italian vs. English. If !!ItemLocate (StrSub (strString, intPos, 3), "CIO,CIE,CIA", ",") strPrimary = strPrimary : "S" strSecondary = strSecondary : "X" Else strPrimary = strPrimary : "S" strSecondary = strSecondary : "S" EndIf intPos = intPos + 2 Break EndIf ; Else ... strPrimary = strPrimary : "K" strSecondary = strSecondary : "K" ; "mac caffrey", "mac gregor". If !!ItemLocate (StrSub (strString, intPos + 1, 2), " C, Q, G", ",") intPos = intPos + 3 Else If !!ItemLocate (StrSub (strString, intPos + 1, 1), "C,K,Q", ",") && !ItemLocate (StrSub (strString, intPos + 1, 2), "CE,CI", ",") intPos = intPos + 2 Else intPos = intPos + 1 EndIf EndIf Break Case 68 ; Char2Num("D"). If StrSub (strString, intPos, 2) == "DG" If !!ItemLocate (StrSub (strString, intPos + 2, 1), "I,E,Y", ",") ; e. g. "edge" strPrimary = strPrimary : "J" strSecondary = strSecondary : "J" intPos = intPos + 3 Break Else ; e. g. "edgar" strPrimary = strPrimary : "TK" strSecondary = strSecondary : "TK" intPos = intPos + 2 Break EndIf EndIf If !!ItemLocate (StrSub (strString, intPos, 2), "DT,DD", ",") strPrimary = strPrimary : "T" strSecondary = strSecondary : "T" intPos = intPos + 2 Break EndIf ; Else ... strPrimary = strPrimary : "T" strSecondary = strSecondary : "T" intPos = intPos + 1 Break Case 70 ; Char2Num("F"). strPrimary = strPrimary : "F" strSecondary = strSecondary : "F" intPos = intPos + 1 If StrSub (strString, intPos, 1) == "F" Then intPos = intPos + 1 Break Case 71 ; Char2Num("G"). If StrSub (strString, intPos + 1, 1) == "H" If intPos > 1 If !StrIndex ("AEIOUY", StrSub (strString, intPos - 1, 1), 1, @FWDSCAN) strPrimary = strPrimary : "K" strSecondary = strSecondary : "K" intPos = intPos + 2 Break EndIf EndIf If intPos < 4 ; "ghislane","ghiradelli". If intPos == 1 If StrSub (strString, intPos + 2, 1) == "I" strPrimary = strPrimary : "J" strSecondary = strSecondary : "J" Else strPrimary = strPrimary : "K" strSecondary = strSecondary : "K" EndIf intPos = intPos + 2 Break EndIf EndIf ; Parker's rule (with some further refinements). blnValue1 = intPos > 2 blnValue2 = !!ItemLocate (StrSub (strString, intPos - 2, 1), "B,H,D", ",") ; e. g., 'hugh' blnValue3 = intPos > 3 blnValue4 = !!ItemLocate (StrSub (strString, intPos - 3, 1), "B,H,D", ",") ; e. g., 'bough' blnValue5 = intPos > 4 blnValue6 = !!ItemLocate (StrSub (strString, intPos - 4, 1), "B,H", ",") ; e. g., 'broughton' If (blnValue1 && blnValue2) || (blnValue3 && blnValue4) || (blnValue5 && blnValue6) intPos = intPos + 2 Break Else ; e. g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'. If (intPos > 3) && (StrSub (strString, intPos - 1, 1) == "U") && !!ItemLocate (StrSub (strString, intPos - 3, 1), "C,G,L,R,T", ",") strPrimary = strPrimary : "F" strSecondary = strSecondary : "F" Else If (intPos > 1) && (StrSub (strString, intPos - 1, 1) != "I") strPrimary = strPrimary : "K" strSecondary = strSecondary : "K" EndIf EndIf intPos = intPos + 2 Break EndIf EndIf If StrSub (strString, intPos + 1, 1) == "N" If (intPos == 2) && !!StrIndex ("AEIOUY", StrSub (strString, 1, 1), 1, @FWDSCAN) && !blnIsSlavoGermanic strPrimary = strPrimary : "KN" strSecondary = strSecondary : "N" Else ; Not e. g. 'cagney'. If (StrSub (strString, intPos + 2, 2) != "EY") && (StrSub (strString, intPos + 1, 1) != "Y") && !blnIsSlavoGermanic strPrimary = strPrimary : "N" strSecondary = strSecondary : "KN" Else strPrimary = strPrimary : "KN" strSecondary = strSecondary : "KN" EndIf EndIf intPos = intPos + 2 Break EndIf ; "tagliaro". If !blnIsSlavoGermanic If StrSub (strString, intPos + 1, 2) == "LI" strPrimary = strPrimary : "KL" strSecondary = strSecondary : "L" intPos = intPos + 2 Break EndIf EndIf ; -ges-,-gep-,-gel- at beginning. If intPos == 1 If StrSub (strString, intPos + 1, 1) == "Y" strPrimary = strPrimary : "K" strSecondary = strSecondary : "J" intPos = intPos + 2 Break EndIf If !!ItemLocate (StrSub (strString, intPos + 1, 2), "ES,EP,EB,EL,EY,IB,IL,IN,IE,EI,ER", ",") strPrimary = strPrimary : "K" strSecondary = strSecondary : "J" intPos = intPos + 2 Break EndIf EndIf ; -ger-,-gy-. If !ItemLocate (StrSub (strString, intPos - 1, 3), "RGY,OGY", ",") If !ItemLocate (StrSub (strString, intPos - 1, 1), "E,I", ",") If !ItemLocate (StrSub (strString, 1, 6), "DANGER,RANGER,MANGER", ",") If (StrSub (strString, intPos + 1, 1) == "Y") || !!ItemLocate (StrSub (strString, intPos + 1, 2), "ER", ",") strPrimary = strPrimary : "K" strSecondary = strSecondary : "J" intPos = intPos + 2 Break EndIf EndIf EndIf EndIf ; Italian e. g. "biaggi". If !!ItemLocate (StrSub (strString, intPos + 1, 1), "E,I,Y", ",") || !!ItemLocate (StrSub (strString, intPos - 1, 4), "AGGI,OGGI", ",") ; Obvious germanic. If !!ItemLocate (StrSub (strString, 1, 4), "VAN ,VON ", ",") || (StrSub (strString, 1, 3) == "SCH") || (StrSub (strString, intPos + 1, 2) == "ET") strPrimary = strPrimary : "K" strSecondary = strSecondary : "K" Else ; Always soft if french ending. If StrSub (strString, intPos + 1, 4) == "IER " strPrimary = strPrimary : "J" strSecondary = strSecondary : "J" Else strPrimary = strPrimary : "J" strSecondary = strSecondary : "K" EndIf EndIf intPos = intPos + 2 Break EndIf strPrimary = strPrimary : "K" strSecondary = strSecondary : "K" intPos = intPos + 1 If StrSub (strString, intPos, 1) == "G" Then intPos = intPos + 1 Break Case 72 ; Char2Num("H"). ; Only keep if first & before vowel or btw. 2 vowels. If !!StrIndex ("AEIOUY", StrSub (strString, intPos + 1, 1), 1, @FWDSCAN) If intPos == 1 strPrimary = strPrimary : "H" strSecondary = strSecondary : "H" intPos = intPos + 2 Break EndIf If !!StrIndex ("AEIOUY", StrSub (strString, intPos - 1, 1), 1, @FWDSCAN) strPrimary = strPrimary : "H" strSecondary = strSecondary : "H" intPos = intPos + 2 Break EndIf EndIf ; Also takes care of 'HH'. intPos = intPos + 1 Break Case 74 ; Char2Num("J"). ; Obvious spanish, "jose","san jacinto". If (StrSub (strString, intPos, 4) == "JOSE") || (StrSub (strString, 1, 4) == "SAN ") If ((intPos == 1) && (StrSub (strString, intPos + 4, 1) == " ")) || (StrSub (strString, 1, 4) == "SAN ") strPrimary = strPrimary : "H" strSecondary = strSecondary : "H" Else strPrimary = strPrimary : "J" strSecondary = strSecondary : "H" EndIf intPos = intPos + 1 Break EndIf If (intPos == 1) && (StrSub (strString, intPos, 4) != "JOSE") strPrimary = strPrimary : "J" ; Yankelovich/Jankelowicz. strSecondary = strSecondary : "A" Else ; Spanish pron. of e. g. 'bajador'. If !!StrIndex ("AEIOUY", StrSub (strString, intPos - 1, 1), 1, @FWDSCAN) && !blnIsSlavoGermanic && !!ItemLocate (StrSub (strString, intPos + 1, 1), "A,O", ",") strPrimary = strPrimary : "J" strSecondary = strSecondary : "H" Else If intPos == intLast strPrimary = strPrimary : "J" strSecondary = strSecondary : "" Else If !ItemLocate (StrSub (strString, intPos + 1, 1), "L,T,K,S,N,M,B,Z", ",") && !ItemLocate (StrSub (strString, intPos - 1, 1), "S,K,L", ",") strPrimary = strPrimary : "J" strSecondary = strSecondary : "J" EndIf EndIf EndIf EndIf intPos = intPos + 1 If StrSub (strString, intPos, 1) == "J" Then intPos = intPos + 1 ; It could happen. Break Case 75 ; Char2Num("K"). strPrimary = strPrimary : "K" strSecondary = strSecondary : "K" intPos = intPos + 1 If StrSub (strString, intPos, 1) == "K" Then intPos = intPos + 1 Break Case 76 ; Char2Num("L"). If StrSub (strString, intPos + 1, 1) == "L" ; spanish e. g. "cabrillo","gallegos" blnValue1 = intPos == (intLength - 3) blnValue2 = !!ItemLocate (StrSub (strString, intPos - 1, 4), "ILLO,ILLA,ALLE", ",") blnValue3 = !!ItemLocate (StrSub (strString, intLast - 1, 2), "AS,OS", ",") blnValue4 = !!ItemLocate (StrSub (strString, intLast, 1), "A,O", ",") blnValue5 = StrSub (strString, intPos - 1, 4) == "ALLE" If (blnValue1 && blnValue2) || ((blnValue3 || blnValue4) && blnValue5) strPrimary = strPrimary : "L" strSecondary = strSecondary : "" intPos = intPos + 2 Break EndIf intPos = intPos + 2 Else intPos = intPos + 1 EndIf strPrimary = strPrimary : "L" strSecondary = strSecondary : "L" Break Case 77 ; Char2Num("M"). ; "dumb","thumb". blnValue1 = StrSub (strString, intPos - 1, 3) == "UMB" blnValue2 = (intPos + 1) == intLast blnValue3 = StrSub (strString, intPos + 2, 2) == "ER" blnValue4 = StrSub (strString, intPos + 1, 1) == "M" intPos = intPos + 1 If (blnValue1 && (blnValue2 || blnValue3)) || blnValue4 Then intPos = intPos + 1 strPrimary = strPrimary : "M" strSecondary = strSecondary : "M" Break Case 78 ; Char2Num("N"). strPrimary = strPrimary : "N" strSecondary = strSecondary : "N" intPos = intPos + 1 If StrSub (strString, intPos, 1) == "N" Then intPos = intPos + 1 Break Case 209 ; Char2Num("Ñ") ; Ansi=0209 ; Ascii=165. strPrimary = strPrimary : "N" strSecondary = strSecondary : "N" intPos = intPos + 1 Break Case 80 ; Char2Num("P"). If StrSub (strString, intPos + 1, 1) == "H" strPrimary = strPrimary : "F" strSecondary = strSecondary : "F" intPos = intPos + 2 Break EndIf ; Also account for "campbell" and "raspberry". strPrimary = strPrimary : "P" strSecondary = strSecondary : "P" intPos = intPos + 1 If !!ItemLocate (StrSub (strString, intPos, 1), "P,B", ",") Then intPos = intPos + 1 Break Case 81 ; Char2Num("Q"). strPrimary = strPrimary : "K" strSecondary = strSecondary : "K" intPos = intPos + 1 If StrSub (strString, intPos, 1) == "Q" Then intPos = intPos + 1 Break Case 82 ; Char2Num("R"). ; French e. g. "rogier", but exclude "hochmeier". blnValue1 = intPos == intLast blnValue2 = !blnIsSlavoGermanic blnValue3 = StrSub (strString, intPos - 2, 2) == "IE" blnValue4 = !ItemLocate (StrSub (strString, intPos - 4, 2), "ME,MA", ",") If blnValue1 && blnValue2 && blnValue3 && blnValue4 strPrimary = strPrimary : "" strSecondary = strSecondary : "R" Else strPrimary = strPrimary : "R" strSecondary = strSecondary : "R" EndIf intPos = intPos + 1 If StrSub (strString, intPos, 1) == "R" Then intPos = intPos + 1 Break Case 83 ; Char2Num("S"). ; Special cases "island", "isle", "carlisle", "carlysle". If !!ItemLocate (StrSub (strString, intPos - 1, 3), "ISL,YSL", ",") intPos = intPos + 1 Break EndIf ; Special case "sugar-". If (intPos == 1) && (StrSub (strString, intPos, 5) == "SUGAR") strPrimary = strPrimary : "X" strSecondary = strSecondary : "S" intPos = intPos + 1 Break EndIf If StrSub (strString, intPos, 2) == "SH" ; Germanic. If !!ItemLocate (StrSub (strString, intPos + 1, 4), "HEIM,HOEK,HOLM,HOLZ", ",") strPrimary = strPrimary : "S" strSecondary = strSecondary : "S" Else strPrimary = strPrimary : "X" strSecondary = strSecondary : "X" EndIf intPos = intPos + 2 Break EndIf ; Italian & Armenian. If !!ItemLocate (StrSub (strString, intPos, 3), "SIO,SIA", ",") || (StrSub (strString, intPos, 4) == "SIAN") If !blnIsSlavoGermanic strPrimary = strPrimary : "S" strSecondary = strSecondary : "X" Else strPrimary = strPrimary : "S" strSecondary = strSecondary : "S" EndIf intPos = intPos + 3 Break EndIf ; German & Anglicisations, e. g. "smith" match "schmidt","snider" match "schneider". ; Also, -sz- in slavic language altho in hungarian it is pronounced "s". If ((intPos == 1) && !!ItemLocate (StrSub (strString, intPos + 1, 1), "M,N,L,W", ",")) || (StrSub (strString, intPos + 1, 1) == "Z") strPrimary = strPrimary : "S" strSecondary = strSecondary : "X" intPos = intPos + 1 If StrSub (strString, intPos, 1) == "Z" Then intPos = intPos + 1 Break EndIf If StrSub (strString, intPos, 2) == "SC" ; Schlesinger's rule. If StrSub (strString, intPos + 2, 1) == "H" ; Dutch origin, e. g. "school","schooner". If !!ItemLocate (StrSub (strString, intPos + 3, 2), "OO,ER,EN,UY,ED,EM", ",") ; 'schermerhorn', 'schenker'. If !!ItemLocate (StrSub (strString, intPos + 3, 2), "ER,EN", ",") strPrimary = strPrimary : "X" strSecondary = strSecondary : "SK" Else strPrimary = strPrimary : "SK" strSecondary = strSecondary : "SK" EndIf intPos = intPos + 3 Break Else If (intPos == 1) && !StrIndex ("AEIOUY", StrSub (strString, 3, 1), 1, @FWDSCAN) && (StrSub (strString, intPos + 3, 1) != "W") strPrimary = strPrimary : "X" strSecondary = strSecondary : "S" Else strPrimary = strPrimary : "X" strSecondary = strSecondary : "X" EndIf intPos = intPos + 3 Break EndIf EndIf If !!ItemLocate (StrSub (strString, intPos + 2, 1), "I,E,Y", ",") strPrimary = strPrimary : "S" strSecondary = strSecondary : "S" intPos = intPos + 3 Break EndIf ; Else ... strPrimary = strPrimary : "SK" strSecondary = strSecondary : "SK" intPos = intPos + 3 Break EndIf ; French e. g. "resnais","artois". If (intPos == intLast) && !!ItemLocate (StrSub (strString, intPos - 2, 2), "AI,OI", ",") strPrimary = strPrimary : "" strSecondary = strSecondary : "S" Else strPrimary = strPrimary : "S" strSecondary = strSecondary : "S" EndIf intPos = intPos + 1 If !!ItemLocate (StrSub (strString, intPos, 1), "S,Z", ",") Then intPos = intPos + 1 Break Case 84 ; Char2Num("T") If StrSub (strString, intPos, 4) == "TION" strPrimary = strPrimary : "X" strSecondary = strSecondary : "X" intPos = intPos + 3 Break EndIf If !!ItemLocate (StrSub (strString, intPos, 3), "TIA,TCH", ",") strPrimary = strPrimary : "X" strSecondary = strSecondary : "X" intPos = intPos + 3 Break EndIf If (StrSub (strString, intPos, 2) == "TH") || (StrSub (strString, intPos, 3) == "TTH") ; Special case "thomas", "thames" or germanic. If !!ItemLocate (StrSub (strString, intPos + 2, 2), "OM,AM", ",") || !!ItemLocate (StrSub (strString, 1, 4), "VAN ,VON ", ",") || (StrSub (strString, 1, 3) == "SCH") strPrimary = strPrimary : "T" strSecondary = strSecondary : "T" Else strPrimary = strPrimary : "0" ; "0" is "th" strSecondary = strSecondary : "T" EndIf intPos = intPos + 2 Break EndIf strPrimary = strPrimary : "T" strSecondary = strSecondary : "T" intPos = intPos + 1 If !!ItemLocate (StrSub (strString, intPos, 1), "T,D", ",") Then intPos = intPos + 1 Break Case 86 ; Char2Num("V") strPrimary = strPrimary : "F" strSecondary = strSecondary : "F" intPos = intPos + 1 If StrSub (strString, intPos, 1) == "V" Then intPos = intPos + 1 Break Case 87 ; Char2Num("W") ; Can also be in middle of word. If StrSub (strString, intPos, 2) == "WR" strPrimary = strPrimary : "R" strSecondary = strSecondary : "R" intPos = intPos + 2 Break EndIf If (intPos == 1) && (!!StrIndex ("AEIOUY", StrSub (strString, intPos + 1, 1), 1, @FWDSCAN) || (StrSub (strString, intPos, 2) == "WH")) ; Wasserman should match Vasserman. If !!StrIndex ("AEIOUY", StrSub (strString, intPos + 1, 1), 1, @FWDSCAN) strPrimary = strPrimary : "A" strSecondary = strSecondary : "F" Else ; Need Uomo to match Womo. strPrimary = strPrimary : "A" strSecondary = strSecondary : "A" EndIf EndIf ; Arnow should match Arnoff. blnValue1 = intPos == intLast blnValue2 = !!StrIndex ("AEIOUY", StrSub (strString, intPos - 1, 1), 1, @FWDSCAN) blnValue3 = !!ItemLocate (StrSub (strString, intPos - 1, 5), "EWSKI,EWSKY,OWSKI,OWSKY", ",") blnValue4 = StrSub (strString, 1, 3) == "SCH" If (blnValue1 && blnValue2) || blnValue3 || blnValue4 strPrimary = strPrimary : "" strSecondary = strSecondary : "F" intPos = intPos + 1 Break EndIf ; Polish e. g. "filipowicz". If !!ItemLocate (StrSub (strString, intPos, 4), "WICZ,WITZ", ",") strPrimary = strPrimary : "TS" strSecondary = strSecondary : "FX" intPos = intPos + 4 Break EndIf ; Else skip it. intPos = intPos + 1 Break Case 88 ; Char2Num("X") ; French e. g. breaux. blnValue1 = intPos == intLast blnValue2 = !!ItemLocate (StrSub (strString, intPos - 3, 3), "IAU,EAU", ",") blnValue3 = !!ItemLocate (StrSub (strString, intPos - 2, 2), "AU,OU", ",") If !(blnValue1 && (blnValue2 || blnValue3)) strPrimary = strPrimary : "KS" strSecondary = strSecondary : "KS" EndIf intPos = intPos + 1 If !!ItemLocate (StrSub (strString, intPos, 1), "C,X", ",") Then intPos = intPos + 1 Break Case 90 ; Char2Num("Z") ; Chinese pinyin e. g. "zhao". If StrSub (strString, intPos + 1, 1) == "H" strPrimary = strPrimary : "J" strSecondary = strSecondary : "J" intPos = intPos + 2 Break Else blnValue1 = !!ItemLocate (StrSub (strString, intPos + 1, 2), "ZO,ZI,ZA", ",") blnValue2 = blnIsSlavoGermanic blnValue3 = intPos > 1 blnValue4 = StrSub (strString, intPos - 1, 1) != "T" If blnValue1 || (blnValue2 && (blnValue3 && blnValue4)) strPrimary = strPrimary : "S" strSecondary = strSecondary : "TS" Else strPrimary = strPrimary : "S" strSecondary = strSecondary : "S" EndIf EndIf intPos = intPos + 1 If StrSub (strString, intPos, 1) == "Z" Then intPos = intPos + 1 Break Case intChar intPos = intPos + 1 Break EndSwitch EndWhile strPrimary = StrSub (strPrimary, 1, intCodeLength) strSecondary = StrSub (strSecondary, 1, intCodeLength) Return strPrimary : @TAB : strSecondary ;.......................................................................................................................................... ; This UDF "udfStrDoubleMetaphone" returns a list of two key coded strings separated by @TAB delimiter. ; The function gives back the pronounciation of the original input string ; most likely to be heard in the U.S. in the first key, and the native sound in the second key. ; ; This "sounds like" algorithm was developed by Lawrence Philips <lphilips@verity.com> in 1998..1999, ; published in the June, 2000 issue of C/C++ Users Journal, "The Double Metaphone Search Algorithm". ; Double Metaphone is an improved version of Philips' original Metaphone algorithm ; published in Computer Language magazine in 1990. ; ; This WinBatch translation is based on the PHP code from Stephen Woodbridge <woodbri@swoodbridge.com>, ; which incorporates several bug fixes courtesy of Kevin Atkinson <kevina@users.sourceforge.net>, ; which is based heavily on the C implementation by Maurice Aubrey <maurice@hevanet.com>, ; which in turn is based heavily on the C++ implementation by Lawrence Philips. ;.......................................................................................................................................... ; Note: ; The logic in the algorithm is particularly frightening (basically given by the problem). ; A giant switch statement analyses the input string character by character, ; and considers secondary conditions from the language environment. ; There are some logic resp. code structures in the original C version, ; and left in this implementation too, which are able to be tuned up for speed, ; even for the WinBatch interpreter. ; ; I have tried to implement some refinements but as soon as the script blows up rapidly ; and becomes more and more unreadable, I decided to fall back, and encoded the logic ; as near as possible and most similar like the referenced C and PHP scripts. ; ; There is no guarantee, that the result of this implementation will be compatible with the result of other ; implementations of the Double Metaphone algorithm (because lack of reliable reference test data). ; Needless to say, Double Metaphone will give you a better hit rate on name searches than the original Metaphone version. ;.......................................................................................................................................... ; Detlev Dalitz.20020802.20100206. ;.......................................................................................................................................... #EndFunction ;----------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ; Test. strList = "This is WinBatch code." ;1 This is WinBatch code. 0SSNPXKT TSSNPXKT ; "0" is "th". strList = strList : @LF : "Detlev Dalitz" ;2 Detlev Dalitz TTLFTLTS TTLFTLTS strList = strList : @LF : "Angelika" ;3 Angelika ANJLK ANKLK strList = strList : @LF : "Christopher" ;4 Christopher KRSTFR KRSTFR strList = strList : @LF : "Sabine" ;5 Sabine SPN SPN strList = strList : @LF : "Daniel" ;6 Daniel TNL TNL strList = strList : @LF : "Margot" ;7 Margot MRKT MRKT strList = strList : @LF : "Otto" ;8 Otto AT AT strList = strList : @LF : "McComb" ;9 McComb MKMP MKMP strList = strList : @LF : "Susan" ;10 Susan SSN SSN strList = strList : @LF : "Souzanna" ;11 Souzanna SSN SSN strList = strList : @LF : "Schubert" ;12 Schubert XPRT SPRT strList = strList : @LF : "Shubert" ;13 Shubert XPRT XPRT strList = strList : @LF : "technical" ;14 technical TKNKL TKNKL strList = strList : @LF : "chemical" ;15 chemical KMKL KMKL strList = strList : @LF : "Lee" ;16 Lee L L strList = strList : @LF : "Bonner" ;17 Bonner PNR PNR strList = strList : @LF : "Baymore" ;18 Baymore PMR PMR strList = strList : @LF : "Saneed" ;19 Saneed SNT SNT strList = strList : @LF : "Van Hoesen" ;20 Van Hoesen FNSN FNSN strList = strList : @LF : "Vincenco" ;21 Vincenco FNSNK FNSNK ; Italian: Should be more like "FNSNS". strList = strList : @LF : "Jürgen von Manger" ;22 Jürgen von Manger JRJNFNMNKR ARKNFNMNJR strList = strList : @LF : "Edgar Wallace" ;23 Edgar Wallace ATKRLS ATKRLS strList = strList : @LF : "Stuart Granger" ;24 Stuart Granger STRTKRNKR STRTKRNJR strList = strList : @LF : "Roger Daltry" ;25 Roger Daltry RKRTLTR RJRTLTR strList = strList : @LF : "Lukasiewicz" ;26 Lukasiewicz LKSTS LKSFX strList = strList : @LF : "Van Houten" ;27 Van Houten FNTN FNTN strList = strList : @LF : "Kuczewski" ;28 Kuczewski KSSK KXFSK strList = strList : @LF : "Bordeaux" ;29 Bordeaux PRT PRT strList = strList : @LF : "Breaux" ;30 Breaux PR PR strList = strList : @LF : "Zhao" ;31 Zhao J J strList = strList : @LF : "Womo" ;32 Womo AM FM strList = strList : @LF : "Uomo" ;33 Uomo AM AM strList = strList : @LF : "Arnoff" ;34 Arnoff ARNF ARNF strList = strList : @LF : "Arnow" ;35 Arnow ARN ARNF strList = strList : @LF : "Filipovicz" ;36 Filipovicz FLPFS FLPFX strList = strList : @LF : "Vasserman" ;37 Vasserman FSRMN FSRMN strList = strList : @LF : "Wassermann" ;38 Wassermann ASRMN FSRMN strList = strList : @LF : "Thames" ;39 Thames TMS TMS strList = strList : @LF : "Thomas" ;40 Thomas TMS TMS strList = strList : @LF : "school" ;41 school SKL SKL strList = strList : @LF : "schooner" ;42 schooner SKNR SKNR strList = strList : @LF : "schenker" ;43 schenker XNKR SKNKR strList = strList : @LF : "snider" ;44 snider SNTR XNTR strList = strList : @LF : "schneider" ;45 schneider XNTR SNTR strList = strList : @LF : "schmidt" ;46 schmidt XMT SMT strList = strList : @LF : "smith" ;47 smith SM0 XMT ; "0" is "th". strList = strList : @LF : "Carlysle" ;48 Carlysle KRLL KRLL strList = strList : @LF : "Carlisle" ;49 Carlisle KRLL KRLL strList = strList : @LF : "Eisel" ;50 Eisel ASL ASL strList = strList : @LF : "Isle" ;51 Isle AL AL strList = strList : @LF : "Island" ;52 Island ALNT ALNT strList = strList : @LF : "Rogier" ;53 Rogier RJ RJR strList = strList : @LF : "Hochmeier" ;54 Hochmeier HKMR HKMR strList = strList : @LF : "Raspberry" ;55 Raspberry RSPR RSPR strList = strList : @LF : "Campbell" ;56 Campbell KMPL KMPL strList = strList : @LF : "Gallegos" ;57 Gallegos KLKS KKS strList = strList : @LF : "Cabrillo" ;58 Cabrillo KPRL KPRL strList = strList : @LF : "Señor" ;59 Señor SNR SNR strList = strList : @LF : "Jablunowski" ;60 Jablunowski JPLNSK APLNFSK strList = strList : @LF : "Yankelovich" ;61 Yankelovich ANKLFX ANKLFK strList = strList : @LF : "Jankelowicz" ;62 Jankelowicz JNKLTS ANKLFX strList = strList : @LF : "bajador" ;63 bajador PJTR PHTR strList = strList : @LF : "San Jacinto" ;64 San Jacinto SNHSNT SNHSNT strList = strList : @LF : "Jose" ;65 Jose HS HS strList = strList : @LF : "Cagney" ;66 Cagney KKN KKN strList = strList : @LF : "Biaggi" ;67 Biaggi PJ PK strList = strList : @LF : "tagliaro" ;68 tagliaro TKLR TLR strList = strList : @LF : "McLaughlin" ;69 McLaughlin MKLFLN MKLFLN strList = strList : @LF : "broughton" ;70 broughton PRTN PRTN strList = strList : @LF : "bough" ;71 bough P P strList = strList : @LF : "hugh" ;72 hugh H H strList = strList : @LF : "ghislane" ;73 ghislane JLN JLN strList = strList : @LF : "ghiradelli" ;74 ghiradelli JRTL JRTL strList = strList : @LF : "Toto" ;75 Toto TT TT strList = strList : @LF : "Edgar" ;76 Edgar ATKR ATKR strList = strList : @LF : "mac gregor" ;77 mac gregor MKRKR MKRKR strList = strList : @LF : "mac caffrey" ;78 mac caffrey MKFR MKFR strList = strList : @LF : "McHugh" ;79 McHugh MK MK strList = strList : @LF : "Bacchus" ;80 Bacchus PKS PKS strList = strList : @LF : "Bellocchio" ;81 Bellocchio PLX PLX strList = strList : @LF : "Bertucci" ;82 Bertucci PRTX PRTX strList = strList : @LF : "Bacci" ;83 Bacci PX PX strList = strList : @LF : "Broccoli" ;84 Broccoli PRKL PRKL strList = strList : @LF : "Focaccia" ;85 Focaccia FKX FKX strList = strList : @LF : "Czerny" ;86 Czerny SRN XRN strList = strList : @LF : "Tucker" ;87 Tucker TKR TKR strList = strList : @LF : "Tuchner" ;88 Tuchner TKNR TKNR strList = strList : @LF : "Tischler" ;89 Tischler TXLR TXLR strList = strList : @LF : "Wechsler" ;90 Wechsler AKSLR FKSLR strList = strList : @LF : "Wachtler" ;91 Wachtler AXTLR FKTLR strList = strList : @LF : "Orchester" ;92 Orchester ARKSTR ARKSTR strList = strList : @LF : "Architekt" ;93 Architekt ARKTKT ARKTKT strList = strList : @LF : "chorus" ;94 chorus KRS KRS strList = strList : @LF : "chemistry" ;95 chemistry KMSTR KMSTR strList = strList : @LF : "Michaelis" ;96 Michaelis MKLS MXLS strList = strList : @LF : "Kant" ;97 Kant KNT KNT strList = strList : @LF : "Chianti" ;98 Chianti KNT KNT strList = strList : @LF : "Caesar" ;99 Caesar SSR SSR strList = strList : @LF : "Aschenbecher" ;100 Aschenbecher AXNPXR ASKNPKR strList = strList : @LF : "Achenbacher" ;101 Achenbacher AXNPXR AKNPKR strList = strList : @LF : "Garçon" ;102 Garçon KRSN KRSN strList = strList : @LF : "dumb" ;103 dumb TM TM strList = strList : @LF : "Xavier" ;104 Xavier SF SFR strList = strList : @LF : "Thrash" ;105 Thrash 0RX TRX ; "0" is "th". strList = strList : @LF : "Matthew" ;106 Matthew M0 MTF ; "0" is "th". strList = strList : @LF : "Matthies" ;107 Matthies M0S MTS ; "0" is "th". strList = strList : @LF : "Christus" ;108 Christus KRSTS KRSTS strFileOut = ShortCutDir ("Local Settings") : "\Temp\" : StrInsert (StrReplace (TimeYmdHms (), ":", ""), ".", "", 9, 1) : ".FileOut.txt" strOut = "" intItem = 0 intCodeLen = 10 BoxOpen ("Demo udfStrDoubleMetaphone (strString, intCodeLength)", "") BoxDataTag (1, 1) ForEach strItem In ObjectType ("ARRAY", Arrayize (strList, @LF)) arrResult = Arrayize (udfStrDoubleMetaphone (strItem, intCodeLen), @TAB) If !ArrInfo (arrResult, 1) Then arrResult = Arrayize (",", ",") intItem = intItem + 1 strOut = strOut : @CRLF : StrFix (intItem, " ", 5) : StrFix (strItem, " ", 30) : @TAB : StrFix (arrResult[0], " ", intCodeLen) : @TAB : StrFix (arrResult[1], " ", intCodeLen) strMsgText = "Item .........: " : intItem : @LF : "Name .........: " : strItem : @LF : "Primary Code .: " : arrResult[0] : @LF : "Secondary Code: " : arrResult[1] BoxTextFont (1, "", 100, 40, 1 | 64) BoxText (strMsgText) BoxDataClear (1, 1) Next BoxShut () FilePut (strFileOut, StrSub (strOut, 3, -1)) Run (strFileOut, "") Exit ;-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------