;------------------------------------------------------------------------------------------------------------------------------------------ #DefineFunction udfStrGuthMatch (strString1, strString2, intMode) intMode = Min (3, Max (1, intMode)) blnMatch = @FALSE intMatch = 0 If strString1 == "" Then Goto ReturnMatch If strString2 == "" Then Goto ReturnMatch intMatch = 100 strString1 = StrUpper (strString1) strString2 = StrUpper (strString2) blnMatch = strString1 == strString2 If blnMatch Then Goto ReturnMatch ;.......................................................................................................................................... ; Cleanup procedures, not quite necessary, but useful. strString1 = StrClean (strString1, "ÄÅÃÂÁÀ", "A", @TRUE, 1) strString2 = StrClean (strString2, "ÄÅÃÂÁÀ", "A", @TRUE, 1) blnMatch = strString1 == strString2 If blnMatch Then Goto ReturnMatch strString1 = StrClean (strString1, "ËÊÉÈ", "E", @TRUE, 1) strString2 = StrClean (strString2, "ËÊÉÈ", "E", @TRUE, 1) blnMatch = strString1 == strString2 If blnMatch Then Goto ReturnMatch strString1 = StrClean (strString1, "ÏÎÍÌ", "I", @TRUE, 1) strString2 = StrClean (strString2, "ÏÎÍÌ", "I", @TRUE, 1) blnMatch = strString1 == strString2 If blnMatch Then Goto ReturnMatch strString1 = StrClean (strString1, "ÒÓÔÕÖ", "O", @TRUE, 1) strString2 = StrClean (strString2, "ÒÓÔÕÖ", "O", @TRUE, 1) blnMatch = strString1 == strString2 If blnMatch Then Goto ReturnMatch strString1 = StrClean (strString1, "ÜÛÚÙ", "U", @TRUE, 1) strString2 = StrClean (strString2, "ÜÛÚÙ", "U", @TRUE, 1) blnMatch = strString1 == strString2 If blnMatch Then Goto ReturnMatch strString1 = StrReplace (strString1, "Ç", "C") strString2 = StrReplace (strString2, "Ç", "C") blnMatch = strString1 == strString2 If blnMatch Then Goto ReturnMatch strString1 = StrReplace (strString1, "Ñ", "N") strString2 = StrReplace (strString2, "Ñ", "N") blnMatch = strString1 == strString2 If blnMatch Then Goto ReturnMatch strString1 = StrClean (strString1, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "", @TRUE, 2) strString2 = StrClean (strString2, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "", @TRUE, 2) blnMatch = strString1 == strString2 If blnMatch Then Goto ReturnMatch intS1Len = StrLen (strString1) If !intS1Len Then Goto ReturnMatch intS2Len = StrLen (strString2) If !intS2Len Then Goto ReturnMatch ;.......................................................................................................................................... ; The Guth algorithm. blnMatch = @TRUE intMatch = 0 intS1 = 0 intS2 = 0 strChar1 = "" strChar2 = "" While @TRUE If strChar1 != "" Then If strChar2 != "" Then If strChar1 == strChar2 Then intMatch = intMatch + 1 ; Case (S1:x+0,S2:x+0). intS1 = intS1 + 1 intS2 = intS2 + 1 If intS1 > intS1Len Then If intS2 > intS2Len Then Break strChar1 = StrSub (strString1, intS1, 1) strChar2 = StrSub (strString2, intS2, 1) If strChar1 == strChar2 Then Continue ; Case (S1:x+0,S2:x+1). intS2 = intS2 + 1 If intS1 > intS1Len Then If intS2 > intS2Len Then Break strChar2 = StrSub (strString2, intS2, 1) If strChar1 == strChar2 Then Continue ; Case (S1:x+0,S2:x+2). intS2 = intS2 + 1 If intS1 > intS1Len Then If intS2 > intS2Len Then Break strChar2 = StrSub (strString2, intS2, 1) If strChar1 == strChar2 Then Continue ; Case (S1:x+0,S2:x-1). intS2 = intS2 - 3 If intS1 > intS1Len Then If intS2 > intS2Len Then Break strChar2 = StrSub (strString2, intS2, 1) If strChar1 == strChar2 Then Continue ; Case (S1:x-1,S2:x+0). intS1 = intS1 - 1 intS2 = intS2 + 1 If intS1 > intS1Len Then If intS2 > intS2Len Then Break strChar1 = StrSub (strString1, intS1, 1) strChar2 = StrSub (strString2, intS2, 1) If strChar1 == strChar2 Then Continue ; Case (S1:x+1,S2:x+0). intS1 = intS1 + 2 If intS1 > intS1Len Then If intS2 > intS2Len Then Break strChar1 = StrSub (strString1, intS1, 1) If strChar1 == strChar2 Then Continue ; Case (S1:x+2,S2:x+0). intS1 = intS1 + 1 If intS1 > intS1Len Then If intS2 > intS2Len Then Break strChar1 = StrSub (strString1, intS1, 1) If strChar1 == strChar2 Then Continue ; Case (S1:x+1,S2:x+1). intS1 = intS1 - 1 intS2 = intS2 + 1 If intS1 > intS1Len Then If intS2 > intS2Len Then Break strChar1 = StrSub (strString1, intS1, 1) strChar2 = StrSub (strString2, intS2, 1) If strChar1 == strChar2 Then Continue ; Case (S1:x+2,S2:x+1) ; Added by Detlev Dalitz.20020806. intS1 = intS1 + 1 If intS1 > intS1Len Then If intS2 > intS2Len Then Break strChar1 = StrSub (strString1, intS1, 1) If strChar1 == strChar2 Then Continue ; Case (S1:x+2,S2:x+2). intS2 = intS2 + 1 If intS1 > intS1Len Then If intS2 > intS2Len Then Break strChar2 = StrSub (strString2, intS2, 1) If strChar1 == strChar2 Then Continue ; Case no match. blnMatch = @FALSE If intMode == 1 Then Break intS1 = intS1 - 1 intS2 = intS2 - 1 EndWhile If intMode != 1 Then intMatch = 100 * 2 * Min (intMatch, intS1Len, intS2Len) / (intS1Len + intS2Len) :ReturnMatch If intMode == 1 Then Return blnMatch If intMode == 2 Then Return intMatch If intMode == 3 Then Return blnMatch : @TAB : intMatch ;.......................................................................................................................................... ; This UDF "udfStrGuthMatch" makes use of an algorithm envolved by Gloria J.A. Guth in 1976. ; The description of the algorithm was taken from "An Assessment of Name Matching Algorithms", ; A. J. Lait and B. Randell, "NameMatching.pdf", Created: 16.04.2001 15:26:54. ; ; The algorithm is a letter-by-letter comparison, and has obvious advantages when applied to names of multi-ethnic population. ; ; I have added some character 'cleaning' procedures prior to the specific Guth algorithm. ; Also added a compare case (S1:x+2,S2:x+1), which had not exist in the above mentioned referenced "NameMatching.pdf". ; ; The result of the function can be modified by parameter intMode: ; intMode = 1 ... Returns a boolean value (@FALSE..@TRUE, 0..1), ; which indicates if the two given strings are matched upon the rules of the modified Guth algorithm. ; intMode = 2 ... Returns an integer value (percent number 0..100), ; which indicates how 'good' the given strings are matched together. ; This gives a measure of the degree of similarity. ; intMode = 3 ... Returns a list of two items separated by @TAB delimiter, e. g. "1@TAB76" or "0@TAB30", ; which combines the boolean value and the percentage value. ;.......................................................................................................................................... ; ; Note: ; ; There are some anomalies left in the algorithm: ; ; "Daley,Dalitz" ; which gives "@FALSE,54 pct.", this maybe @FALSE anywhere, although the similarity value is slightly better than 50 pct. ; That is caused by the given names, which seems to be to short for a reliable comparison. ; ; "Alex,Alexander" ; which gives "@FALSE,61 pct.", this maybe @TRUE anywhere, because the similarity value is relatively high above 50 pct. ; This is often the case where one element of a name pair is an abbreviated shorter form of the other. ; Those results should be further examinated by some other method, maybe by a soundex or metaphone algorithm. ;.......................................................................................................................................... ; Detlev Dalitz.20020806.20100207. ;.......................................................................................................................................... #EndFunction ;------------------------------------------------------------------------------------------------------------------------------------------ ; Test. strOut = "Test: udfStrGuthMatch ()" : @CRLF strString1 = "GLAVIN" strString2 = "GLAVIN" intMode = 3 strGuthMatch1 = udfStrGuthMatch (strString1, strString2, intMode) ; @TRUE 100 pct. strOut = strOut : @CRLF : strString1 : @CRLF : strString2 : @CRLF : strGuthMatch1 : @CRLF : @CRLF strString1 = "GLAVINE" strString2 = "GLAWYN" intMode = 3 strGuthMatch2 = udfStrGuthMatch (strString1, strString2, intMode) ; @TRUE 61 pct. strOut = strOut : @CRLF : strString1 : @CRLF : strString2 : @CRLF : strGuthMatch2 : @CRLF : @CRLF strString1 = "DeLedes" strString2 = "Dalitz" intMode = 3 strGuthMatch3 = udfStrGuthMatch (strString1, strString2, intMode) ; @FALSE 30 pct. strOut = strOut : @CRLF : strString1 : @CRLF : strString2 : @CRLF : strGuthMatch3 : @CRLF : @CRLF strString1 = "Darlitz" strString2 = "Dalitz" intMode = 3 strGuthMatch4 = udfStrGuthMatch (strString1, strString2, intMode) ; @TRUE 92 pct. strOut = strOut : @CRLF : strString1 : @CRLF : strString2 : @CRLF : strGuthMatch4 : @CRLF : @CRLF strString1 = "Dahlicz" strString2 = "Dalitz" intMode = 3 strGuthMatch5 = udfStrGuthMatch (strString1, strString2, intMode) ; @TRUE 76 pct. strOut = strOut : @CRLF : strString1 : @CRLF : strString2 : @CRLF : strGuthMatch5 : @CRLF : @CRLF strString1 = "Dahley" strString2 = "Dalitz" intMode = 3 strGuthMatch6 = udfStrGuthMatch (strString1, strString2, intMode) ; @FALSE 54 pct. strOut = strOut : @CRLF : strString1 : @CRLF : strString2 : @CRLF : strGuthMatch6 : @CRLF : @CRLF strString1 = "Dallwitz" strString2 = "Dalitz" intMode = 3 strGuthMatch7 = udfStrGuthMatch (strString1, strString2, intMode) ; @TRUE 85 pct. strOut = strOut : @CRLF : strString1 : @CRLF : strString2 : @CRLF : strGuthMatch7 : @CRLF : @CRLF strString1 = "Duhlitz" strString2 = "Dalitz" intMode = 3 strGuthMatch8 = udfStrGuthMatch (strString1, strString2, intMode) ; @TRUE 76 pct. strOut = strOut : @CRLF : strString1 : @CRLF : strString2 : @CRLF : strGuthMatch8 : @CRLF : @CRLF strString1 = "Talfritz" strString2 = "Dalitz" intMode = 3 strGuthMatch9 = udfStrGuthMatch (strString1, strString2, intMode) ; @TRUE 71 pct. strOut = strOut : @CRLF : strString1 : @CRLF : strString2 : @CRLF : strGuthMatch9 : @CRLF : @CRLF strString1 = "FORTRAN" strString2 = "BASIC" intMode = 3 strGuthMatch10 = udfStrGuthMatch (strString1, strString2, intMode) ; @FALSE 0 pct. strOut = strOut : @CRLF : strString1 : @CRLF : strString2 : @CRLF : strGuthMatch10 : @CRLF : @CRLF strString1 = "Alex" strString2 = "Alexander" intMode = 3 strGuthMatch11 = udfStrGuthMatch (strString1, strString2, intMode) ; @FALSE 61 pct. strOut = strOut : @CRLF : strString1 : @CRLF : strString2 : @CRLF : strGuthMatch11 : @CRLF : @CRLF strFileOut = ShortCutDir ("Local Settings") : "\Temp\" : StrInsert (StrReplace (TimeYmdHms (), ":", ""), ".", "", 9, 1) : ".FileOut.txt" FilePut (strFileOut, strOut) Run (strFileOut, "") Exit ;------------------------------------------------------------------------------------------------------------------------------------------