udfStrGuthMatch
str udfStrGuthMatch (str, str, int)
;------------------------------------------------------------------------------------------------------------------------------------------
#DefineFunction udfStrGuthMatch (strString1, strString2, intMode)
intMode = Min (3, Max (1, intMode))

blnMatch = @FALSE
intMatch = 0

If strString1 == "" Then Goto ReturnMatch
If strString2 == "" Then Goto ReturnMatch

intMatch = 100

strString1 = StrUpper (strString1)
strString2 = StrUpper (strString2)
blnMatch = strString1 == strString2
If blnMatch Then Goto ReturnMatch

;..........................................................................................................................................
; Cleanup procedures, not quite necessary, but useful.
strString1 = StrClean (strString1, "ÄÅÃÂÁÀ", "A", @TRUE, 1)
strString2 = StrClean (strString2, "ÄÅÃÂÁÀ", "A", @TRUE, 1)
blnMatch = strString1 == strString2
If blnMatch Then Goto ReturnMatch

strString1 = StrClean (strString1, "ËÊÉÈ", "E", @TRUE, 1)
strString2 = StrClean (strString2, "ËÊÉÈ", "E", @TRUE, 1)
blnMatch = strString1 == strString2
If blnMatch Then Goto ReturnMatch

strString1 = StrClean (strString1, "ÏÎÍÌ", "I", @TRUE, 1)
strString2 = StrClean (strString2, "ÏÎÍÌ", "I", @TRUE, 1)
blnMatch = strString1 == strString2
If blnMatch Then Goto ReturnMatch

strString1 = StrClean (strString1, "ÒÓÔÕÖ", "O", @TRUE, 1)
strString2 = StrClean (strString2, "ÒÓÔÕÖ", "O", @TRUE, 1)
blnMatch = strString1 == strString2
If blnMatch Then Goto ReturnMatch

strString1 = StrClean (strString1, "ÜÛÚÙ", "U", @TRUE, 1)
strString2 = StrClean (strString2, "ÜÛÚÙ", "U", @TRUE, 1)
blnMatch = strString1 == strString2
If blnMatch Then Goto ReturnMatch

strString1 = StrReplace (strString1, "Ç", "C")
strString2 = StrReplace (strString2, "Ç", "C")
blnMatch = strString1 == strString2
If blnMatch Then Goto ReturnMatch

strString1 = StrReplace (strString1, "Ñ", "N")
strString2 = StrReplace (strString2, "Ñ", "N")
blnMatch = strString1 == strString2
If blnMatch Then Goto ReturnMatch

strString1 = StrClean (strString1, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "", @TRUE, 2)
strString2 = StrClean (strString2, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "", @TRUE, 2)
blnMatch = strString1 == strString2
If blnMatch Then Goto ReturnMatch

intS1Len = StrLen (strString1)
If !intS1Len Then Goto ReturnMatch
intS2Len = StrLen (strString2)
If !intS2Len Then Goto ReturnMatch
;..........................................................................................................................................

; The Guth algorithm.

blnMatch = @TRUE
intMatch = 0
intS1 = 0
intS2 = 0
strChar1 = ""
strChar2 = ""
While @TRUE
   If strChar1 != "" Then If strChar2 != "" Then If strChar1 == strChar2 Then intMatch = intMatch + 1

   ; Case (S1:x+0,S2:x+0).
   intS1 = intS1 + 1
   intS2 = intS2 + 1
   If intS1 > intS1Len Then If intS2 > intS2Len Then Break
   strChar1 = StrSub (strString1, intS1, 1)
   strChar2 = StrSub (strString2, intS2, 1)
   If strChar1 == strChar2 Then Continue

   ; Case (S1:x+0,S2:x+1).
   intS2 = intS2 + 1
   If intS1 > intS1Len Then If intS2 > intS2Len Then Break
   strChar2 = StrSub (strString2, intS2, 1)
   If strChar1 == strChar2 Then Continue

   ; Case (S1:x+0,S2:x+2).
   intS2 = intS2 + 1
   If intS1 > intS1Len Then If intS2 > intS2Len Then Break
   strChar2 = StrSub (strString2, intS2, 1)
   If strChar1 == strChar2 Then Continue

   ; Case (S1:x+0,S2:x-1).
   intS2 = intS2 - 3
   If intS1 > intS1Len Then If intS2 > intS2Len Then Break
   strChar2 = StrSub (strString2, intS2, 1)
   If strChar1 == strChar2 Then Continue

   ; Case (S1:x-1,S2:x+0).
   intS1 = intS1 - 1
   intS2 = intS2 + 1
   If intS1 > intS1Len Then If intS2 > intS2Len Then Break
   strChar1 = StrSub (strString1, intS1, 1)
   strChar2 = StrSub (strString2, intS2, 1)
   If strChar1 == strChar2 Then Continue

   ; Case (S1:x+1,S2:x+0).
   intS1 = intS1 + 2
   If intS1 > intS1Len Then If intS2 > intS2Len Then Break
   strChar1 = StrSub (strString1, intS1, 1)
   If strChar1 == strChar2 Then Continue

   ; Case (S1:x+2,S2:x+0).
   intS1 = intS1 + 1
   If intS1 > intS1Len Then If intS2 > intS2Len Then Break
   strChar1 = StrSub (strString1, intS1, 1)
   If strChar1 == strChar2 Then Continue

   ; Case (S1:x+1,S2:x+1).
   intS1 = intS1 - 1
   intS2 = intS2 + 1
   If intS1 > intS1Len Then If intS2 > intS2Len Then Break
   strChar1 = StrSub (strString1, intS1, 1)
   strChar2 = StrSub (strString2, intS2, 1)
   If strChar1 == strChar2 Then Continue

   ; Case (S1:x+2,S2:x+1) ; Added by Detlev Dalitz.20020806.
   intS1 = intS1 + 1
   If intS1 > intS1Len Then If intS2 > intS2Len Then Break
   strChar1 = StrSub (strString1, intS1, 1)
   If strChar1 == strChar2 Then Continue

   ; Case (S1:x+2,S2:x+2).
   intS2 = intS2 + 1
   If intS1 > intS1Len Then If intS2 > intS2Len Then Break
   strChar2 = StrSub (strString2, intS2, 1)
   If strChar1 == strChar2 Then Continue

   ; Case no match.
   blnMatch = @FALSE
   If intMode == 1 Then Break
   intS1 = intS1 - 1
   intS2 = intS2 - 1
EndWhile

If intMode != 1 Then intMatch = 100 * 2 * Min (intMatch, intS1Len, intS2Len) / (intS1Len + intS2Len)

:ReturnMatch
If intMode == 1 Then Return blnMatch
If intMode == 2 Then Return intMatch
If intMode == 3 Then Return blnMatch : @TAB : intMatch
;..........................................................................................................................................
; This UDF "udfStrGuthMatch" makes use of an algorithm envolved by Gloria J.A. Guth in 1976.
; The description of the algorithm was taken from "An Assessment of Name Matching Algorithms",
; A. J. Lait and B. Randell, "NameMatching.pdf", Created: 16.04.2001 15:26:54.
;
; The algorithm is a letter-by-letter comparison, and has obvious advantages when applied to names of multi-ethnic population.
;
; I have added some character 'cleaning' procedures prior to the specific Guth algorithm.
; Also added a compare case (S1:x+2,S2:x+1), which had not exist in the above mentioned referenced "NameMatching.pdf".
;
; The result of the function can be modified by parameter intMode:
; intMode = 1 ... Returns a boolean value (@FALSE..@TRUE, 0..1),
;                 which indicates if the two given strings are matched upon the rules of the modified Guth algorithm.
; intMode = 2 ... Returns an integer value (percent number 0..100),
;                 which indicates how 'good' the given strings are matched together.
;                 This gives a measure of the degree of similarity.
; intMode = 3 ... Returns a list of two items separated by @TAB delimiter, e. g. "1@TAB76" or "0@TAB30",
;                 which combines the boolean value and the percentage value.
;..........................................................................................................................................
;
; Note:
;
; There are some anomalies left in the algorithm:
;
; "Daley,Dalitz"
;    which gives "@FALSE,54 pct.", this maybe @FALSE anywhere, although the similarity value is slightly better than 50 pct.
;    That is caused by the given names, which seems to be to short for a reliable comparison.
;
; "Alex,Alexander"
;    which gives "@FALSE,61 pct.", this maybe @TRUE anywhere, because the similarity value is relatively high above 50 pct.
;    This is often the case where one element of a name pair is an abbreviated shorter form of the other.
;    Those results should be further examinated by some other method, maybe by a soundex or metaphone algorithm.
;..........................................................................................................................................
; Detlev Dalitz.20020806.20100207.
;..........................................................................................................................................
#EndFunction
;------------------------------------------------------------------------------------------------------------------------------------------


; Test.

strOut = "Test: udfStrGuthMatch ()" : @CRLF

strString1 = "GLAVIN"
strString2 = "GLAVIN"
intMode = 3
strGuthMatch1 = udfStrGuthMatch (strString1, strString2, intMode)  ; @TRUE   100 pct.
strOut = strOut : @CRLF : strString1 : @CRLF : strString2 : @CRLF : strGuthMatch1 : @CRLF : @CRLF


strString1 = "GLAVINE"
strString2 = "GLAWYN"
intMode = 3
strGuthMatch2 = udfStrGuthMatch (strString1, strString2, intMode)  ; @TRUE    61 pct.
strOut = strOut : @CRLF : strString1 : @CRLF : strString2 : @CRLF : strGuthMatch2 : @CRLF : @CRLF


strString1 = "DeLedes"
strString2 = "Dalitz"
intMode = 3
strGuthMatch3 = udfStrGuthMatch (strString1, strString2, intMode)  ; @FALSE   30 pct.
strOut = strOut : @CRLF : strString1 : @CRLF : strString2 : @CRLF : strGuthMatch3 : @CRLF : @CRLF


strString1 = "Darlitz"
strString2 = "Dalitz"
intMode = 3
strGuthMatch4 = udfStrGuthMatch (strString1, strString2, intMode)  ; @TRUE    92 pct.
strOut = strOut : @CRLF : strString1 : @CRLF : strString2 : @CRLF : strGuthMatch4 : @CRLF : @CRLF


strString1 = "Dahlicz"
strString2 = "Dalitz"
intMode = 3
strGuthMatch5 = udfStrGuthMatch (strString1, strString2, intMode)  ; @TRUE    76 pct.
strOut = strOut : @CRLF : strString1 : @CRLF : strString2 : @CRLF : strGuthMatch5 : @CRLF : @CRLF


strString1 = "Dahley"
strString2 = "Dalitz"
intMode = 3
strGuthMatch6 = udfStrGuthMatch (strString1, strString2, intMode)  ; @FALSE   54 pct.
strOut = strOut : @CRLF : strString1 : @CRLF : strString2 : @CRLF : strGuthMatch6 : @CRLF : @CRLF


strString1 = "Dallwitz"
strString2 = "Dalitz"
intMode = 3
strGuthMatch7 = udfStrGuthMatch (strString1, strString2, intMode)  ; @TRUE    85 pct.
strOut = strOut : @CRLF : strString1 : @CRLF : strString2 : @CRLF : strGuthMatch7 : @CRLF : @CRLF


strString1 = "Duhlitz"
strString2 = "Dalitz"
intMode = 3
strGuthMatch8 = udfStrGuthMatch (strString1, strString2, intMode)  ; @TRUE    76 pct.
strOut = strOut : @CRLF : strString1 : @CRLF : strString2 : @CRLF : strGuthMatch8 : @CRLF : @CRLF


strString1 = "Talfritz"
strString2 = "Dalitz"
intMode = 3
strGuthMatch9 = udfStrGuthMatch (strString1, strString2, intMode)  ; @TRUE    71 pct.
strOut = strOut : @CRLF : strString1 : @CRLF : strString2 : @CRLF : strGuthMatch9 : @CRLF : @CRLF


strString1 = "FORTRAN"
strString2 = "BASIC"
intMode = 3
strGuthMatch10 = udfStrGuthMatch (strString1, strString2, intMode) ; @FALSE    0 pct.
strOut = strOut : @CRLF : strString1 : @CRLF : strString2 : @CRLF : strGuthMatch10 : @CRLF : @CRLF


strString1 = "Alex"
strString2 = "Alexander"
intMode = 3
strGuthMatch11 = udfStrGuthMatch (strString1, strString2, intMode) ; @FALSE   61 pct.
strOut = strOut : @CRLF : strString1 : @CRLF : strString2 : @CRLF : strGuthMatch11 : @CRLF : @CRLF


strFileOut = ShortCutDir ("Local Settings") : "\Temp\" : StrInsert (StrReplace (TimeYmdHms (), ":", ""), ".", "", 9, 1) : ".FileOut.txt"
FilePut (strFileOut, strOut)
Run (strFileOut, "")

Exit
;------------------------------------------------------------------------------------------------------------------------------------------