udfFileDelDupLines (1)
bln udfFileDelDupLines (str, str, str, bln, bln)
;------------------------------------------------------------------------------------------------------------------------------------------
#DefineFunction udfFileDelDupLines (strFilenameIn, strFilenameOut, strFilenameDup, blnTrimBlanks, blnTrimEmptyLines)
If FileSize (strFilenameIn) == 0 Then Return @FALSE ; Nothing to do.

; Create filename for output file if parameter strFilenameOut is empty.
If strFilenameOut == "" Then strFilenameOut = ItemReplace ("deduped." : ItemExtract (-1, strFilenameIn, "."), -1, strFilenameIn, ".")

blnDupYes = strFilenameDup != ""
blnTrimBlanks = !!blnTrimBlanks
blnTrimEmptyLines = !!blnTrimEmptyLines

strItemList = FileGet (strFilenameIn)
If blnTrimEmptyLines Then strItemList = StrReplace (strItemList, @LF : @CR, "") ; Delete all empty lines.
strItemList = StrReplace (strItemList, @CRLF, @LF)
strTrimList = ""
strDupList = ""

intCount = ItemCount (strItemList, @LF)
For intItem = 1 To intCount
   strItem = ItemExtract (intItem, strItemList, @LF)
   If blnTrimBlanks Then strItem = StrTrim (strItem)
   If strItem == ""
      If blnTrimEmptyLines Then Continue
      strTrimList = ItemInsert (strItem, -1, strTrimList, @LF)
   EndIf
   If !ItemLocate (strItem, strTrimList, @LF) Then strTrimList = ItemInsert (strItem, -1, strTrimList, @LF)
      Else If blnDupYes Then strDupList = ItemInsert (strItem, -1, strDupList, @LF)
Next
If strDupList != ""
   strDupList = StrReplace (strDupList, @LF, @CRLF)
   intResult = FilePut (strFilenameDup, strDupList)
EndIf
If strTrimList != ""
   strTrimList = StrReplace (strTrimList, @LF, @CRLF)
   intResult = FilePut (strFilenameOut, strTrimList)
EndIf
Return FileSize (strFilenameIn) > FileSize (strFilenameOut)
;..........................................................................................................................................
; This UDF "udfFileDelDupLines" removes duplicate lines from a textfile.
; The input file strFilenameIn must be a textfile with @CRLF as eol delimiter.
; The input file needs not to be sorted.
;
; If parameter "strFilenameOut" is an empty string then the function uses the parameter "strFilenameIn"
; to create a similar output filename but with an additional suffix ".deduped".
;
; If blnTrimBlanks is @TRUE or 1 then leading and trailing blanks will be deleted,
; If blnTrimBlanks is @FALSE or 0 then leading and trailing blanks will not be touched.
;
; If blnTrimEmptyLines is @TRUE or 1 then empty lines will be deleted.
; If blnTrimEmptyLines is @FALSE or 0 then empty lines will persist.
;
; If input filesize is zero, then the function returns @FALSE immediately.
; On success the function returns @TRUE to indicate that there were duplicate lines detected, otherwise @FALSE.
;..........................................................................................................................................
; Detlev Dalitz.20030705.20090426.20090716.
;..........................................................................................................................................
#EndFunction
;------------------------------------------------------------------------------------------------------------------------------------------


; Test.

strFilenameIn = IntControl (1004, 0, 0, 0, 0) ; We use this file as test input.
strFilenameTmp = FileCreateTemp ("TMP")

:Test1
strFilenameOut = strFilenameTmp : ".trim.1.txt"
strFilenameDup = ""
blnTrimBlanks = @TRUE
blnTrimEmptyLines = @TRUE
blnResult1 = udfFileDelDupLines (strFilenameIn, strFilenameOut, strFilenameDup, blnTrimBlanks, blnTrimEmptyLines) ; @TRUE.
If FileExist (strFilenameIn) == 1 Then RunWait ("notepad.exe", strFilenameIn)
If FileExist (strFilenameOut) == 1 Then RunWait ("notepad.exe", strFilenameOut)

:Test2
strFilenameOut = strFilenameTmp : ".trim.2.txt"
strFilenameDup = ""
blnTrimBlanks = @FALSE
blnTrimEmptyLines = @TRUE
blnResult2 = udfFileDelDupLines (strFilenameIn, strFilenameOut, strFilenameDup, blnTrimBlanks, blnTrimEmptyLines) ; @TRUE.
If FileExist (strFilenameIn) == 1 Then RunWait ("notepad.exe", strFilenameIn)
If FileExist (strFilenameOut) == 1 Then RunWait ("notepad.exe", strFilenameOut)

:Test3
strFilenameOut = strFilenameTmp : ".trim.3.txt"
strFilenameDup = ""
blnTrimBlanks = @TRUE
blnTrimEmptyLines = @FALSE
blnResult3 = udfFileDelDupLines (strFilenameIn, strFilenameOut, strFilenameDup, blnTrimBlanks, blnTrimEmptyLines) ; @TRUE.
If FileExist (strFilenameIn) == 1 Then RunWait ("notepad.exe", strFilenameIn)
If FileExist (strFilenameOut) == 1 Then RunWait ("notepad.exe", strFilenameOut)

:Test4
strFilenameOut = strFilenameTmp : ".trim.4.txt"
strFilenameDup = ""
blnTrimBlanks = @FALSE
blnTrimEmptyLines = @FALSE
blnResult4 = udfFileDelDupLines (strFilenameIn, strFilenameOut, strFilenameDup, blnTrimBlanks, blnTrimEmptyLines) ; @TRUE.
If FileExist (strFilenameIn) == 1 Then RunWait ("notepad.exe", strFilenameIn)
If FileExist (strFilenameOut) == 1 Then RunWait ("notepad.exe", strFilenameOut)

:Test5
strFilenameOut = strFilenameTmp : ".trim.5.txt"
strFilenameDup = strFilenameTmp : ".dupl.5.txt"
blnTrimBlanks = @FALSE
blnTrimEmptyLines = @TRUE
blnResult5 = udfFileDelDupLines (strFilenameIn, strFilenameOut, strFilenameDup, blnTrimBlanks, blnTrimEmptyLines) ; @TRUE.
If FileExist (strFilenameIn) == 1 Then RunWait ("notepad.exe", strFilenameIn)
If FileExist (strFilenameOut) == 1 Then RunWait ("notepad.exe", strFilenameOut)
If FileExist (strFilenameDup) == 1 Then RunWait ("notepad.exe", strFilenameDup)

:Test6
strFilenameIn = strFilenameTmp : ".dupl.5.txt"
strFilenameOut = strFilenameTmp : ".trim.6.txt"
strFilenameDup = strFilenameTmp : ".dupl.6.txt"
blnTrimBlanks = @TRUE
blnTrimEmptyLines = @TRUE
blnResult6 = udfFileDelDupLines (strFilenameIn, strFilenameOut, strFilenameDup, blnTrimBlanks, blnTrimEmptyLines) ; @TRUE.
If FileExist (strFilenameIn) == 1 Then RunWait ("notepad.exe", strFilenameIn)
If FileExist (strFilenameOut) == 1 Then RunWait ("notepad.exe", strFilenameOut)
If FileExist (strFilenameDup) == 1 Then RunWait ("notepad.exe", strFilenameDup)

:Test7
strFilenameIn = strFilenameTmp : ".trim.6.txt"
strFilenameOut = "" ; strFilename : ".trim.7.txt"
strFilenameDup = strFilenameTmp : ".dupl.7.txt"
blnTrimBlanks = @TRUE
blnTrimEmptyLines = @TRUE
blnResult7 = udfFileDelDupLines (strFilenameIn, strFilenameOut, strFilenameDup, blnTrimBlanks, blnTrimEmptyLines) ; @FALSE.
If FileExist (strFilenameIn) == 1 Then RunWait ("notepad.exe", strFilenameIn)
If FileExist (strFilenameOut) == 1 Then RunWait ("notepad.exe", strFilenameOut)
If FileExist (strFilenameDup) == 1 Then RunWait ("notepad.exe", strFilenameDup)

:Cleaning
blnResult = RunHide (Environment ("COMSPEC"), "/cDEL " : ItemReplace ("*", -1, FileFullname (strFilenameTmp), "."))
Exit
;------------------------------------------------------------------------------------------------------------------------------------------