;------------------------------------------------------------------------------------------------------------------------------------------ #DefineFunction udfFileDelDupLines (strFilenameIn, strFilenameOut, strFilenameDup, blnTrimBlanks, blnTrimEmptyLines) If FileSize (strFilenameIn) == 0 Then Return @FALSE ; Nothing to do. ; Create filename for output file if parameter strFilenameOut is empty. If strFilenameOut == "" Then strFilenameOut = ItemReplace ("deduped." : ItemExtract (-1, strFilenameIn, "."), -1, strFilenameIn, ".") blnDupYes = strFilenameDup != "" blnTrimBlanks = !!blnTrimBlanks blnTrimEmptyLines = !!blnTrimEmptyLines strItemList = FileGet (strFilenameIn) If blnTrimEmptyLines Then strItemList = StrReplace (strItemList, @LF : @CR, "") ; Delete all empty lines. strItemList = StrReplace (strItemList, @CRLF, @LF) strTrimList = "" strDupList = "" intCount = ItemCount (strItemList, @LF) For intItem = 1 To intCount strItem = ItemExtract (intItem, strItemList, @LF) If blnTrimBlanks Then strItem = StrTrim (strItem) If strItem == "" If blnTrimEmptyLines Then Continue strTrimList = ItemInsert (strItem, -1, strTrimList, @LF) EndIf If !ItemLocate (strItem, strTrimList, @LF) Then strTrimList = ItemInsert (strItem, -1, strTrimList, @LF) Else If blnDupYes Then strDupList = ItemInsert (strItem, -1, strDupList, @LF) Next If strDupList != "" strDupList = StrReplace (strDupList, @LF, @CRLF) intResult = FilePut (strFilenameDup, strDupList) EndIf If strTrimList != "" strTrimList = StrReplace (strTrimList, @LF, @CRLF) intResult = FilePut (strFilenameOut, strTrimList) EndIf Return FileSize (strFilenameIn) > FileSize (strFilenameOut) ;.......................................................................................................................................... ; This UDF "udfFileDelDupLines" removes duplicate lines from a textfile. ; The input file strFilenameIn must be a textfile with @CRLF as eol delimiter. ; The input file needs not to be sorted. ; ; If parameter "strFilenameOut" is an empty string then the function uses the parameter "strFilenameIn" ; to create a similar output filename but with an additional suffix ".deduped". ; ; If blnTrimBlanks is @TRUE or 1 then leading and trailing blanks will be deleted, ; If blnTrimBlanks is @FALSE or 0 then leading and trailing blanks will not be touched. ; ; If blnTrimEmptyLines is @TRUE or 1 then empty lines will be deleted. ; If blnTrimEmptyLines is @FALSE or 0 then empty lines will persist. ; ; If input filesize is zero, then the function returns @FALSE immediately. ; On success the function returns @TRUE to indicate that there were duplicate lines detected, otherwise @FALSE. ;.......................................................................................................................................... ; Detlev Dalitz.20030705.20090426.20090716. ;.......................................................................................................................................... #EndFunction ;------------------------------------------------------------------------------------------------------------------------------------------ ; Test. strFilenameIn = IntControl (1004, 0, 0, 0, 0) ; We use this file as test input. strFilenameTmp = FileCreateTemp ("TMP") :Test1 strFilenameOut = strFilenameTmp : ".trim.1.txt" strFilenameDup = "" blnTrimBlanks = @TRUE blnTrimEmptyLines = @TRUE blnResult1 = udfFileDelDupLines (strFilenameIn, strFilenameOut, strFilenameDup, blnTrimBlanks, blnTrimEmptyLines) ; @TRUE. If FileExist (strFilenameIn) == 1 Then RunWait ("notepad.exe", strFilenameIn) If FileExist (strFilenameOut) == 1 Then RunWait ("notepad.exe", strFilenameOut) :Test2 strFilenameOut = strFilenameTmp : ".trim.2.txt" strFilenameDup = "" blnTrimBlanks = @FALSE blnTrimEmptyLines = @TRUE blnResult2 = udfFileDelDupLines (strFilenameIn, strFilenameOut, strFilenameDup, blnTrimBlanks, blnTrimEmptyLines) ; @TRUE. If FileExist (strFilenameIn) == 1 Then RunWait ("notepad.exe", strFilenameIn) If FileExist (strFilenameOut) == 1 Then RunWait ("notepad.exe", strFilenameOut) :Test3 strFilenameOut = strFilenameTmp : ".trim.3.txt" strFilenameDup = "" blnTrimBlanks = @TRUE blnTrimEmptyLines = @FALSE blnResult3 = udfFileDelDupLines (strFilenameIn, strFilenameOut, strFilenameDup, blnTrimBlanks, blnTrimEmptyLines) ; @TRUE. If FileExist (strFilenameIn) == 1 Then RunWait ("notepad.exe", strFilenameIn) If FileExist (strFilenameOut) == 1 Then RunWait ("notepad.exe", strFilenameOut) :Test4 strFilenameOut = strFilenameTmp : ".trim.4.txt" strFilenameDup = "" blnTrimBlanks = @FALSE blnTrimEmptyLines = @FALSE blnResult4 = udfFileDelDupLines (strFilenameIn, strFilenameOut, strFilenameDup, blnTrimBlanks, blnTrimEmptyLines) ; @TRUE. If FileExist (strFilenameIn) == 1 Then RunWait ("notepad.exe", strFilenameIn) If FileExist (strFilenameOut) == 1 Then RunWait ("notepad.exe", strFilenameOut) :Test5 strFilenameOut = strFilenameTmp : ".trim.5.txt" strFilenameDup = strFilenameTmp : ".dupl.5.txt" blnTrimBlanks = @FALSE blnTrimEmptyLines = @TRUE blnResult5 = udfFileDelDupLines (strFilenameIn, strFilenameOut, strFilenameDup, blnTrimBlanks, blnTrimEmptyLines) ; @TRUE. If FileExist (strFilenameIn) == 1 Then RunWait ("notepad.exe", strFilenameIn) If FileExist (strFilenameOut) == 1 Then RunWait ("notepad.exe", strFilenameOut) If FileExist (strFilenameDup) == 1 Then RunWait ("notepad.exe", strFilenameDup) :Test6 strFilenameIn = strFilenameTmp : ".dupl.5.txt" strFilenameOut = strFilenameTmp : ".trim.6.txt" strFilenameDup = strFilenameTmp : ".dupl.6.txt" blnTrimBlanks = @TRUE blnTrimEmptyLines = @TRUE blnResult6 = udfFileDelDupLines (strFilenameIn, strFilenameOut, strFilenameDup, blnTrimBlanks, blnTrimEmptyLines) ; @TRUE. If FileExist (strFilenameIn) == 1 Then RunWait ("notepad.exe", strFilenameIn) If FileExist (strFilenameOut) == 1 Then RunWait ("notepad.exe", strFilenameOut) If FileExist (strFilenameDup) == 1 Then RunWait ("notepad.exe", strFilenameDup) :Test7 strFilenameIn = strFilenameTmp : ".trim.6.txt" strFilenameOut = "" ; strFilename : ".trim.7.txt" strFilenameDup = strFilenameTmp : ".dupl.7.txt" blnTrimBlanks = @TRUE blnTrimEmptyLines = @TRUE blnResult7 = udfFileDelDupLines (strFilenameIn, strFilenameOut, strFilenameDup, blnTrimBlanks, blnTrimEmptyLines) ; @FALSE. If FileExist (strFilenameIn) == 1 Then RunWait ("notepad.exe", strFilenameIn) If FileExist (strFilenameOut) == 1 Then RunWait ("notepad.exe", strFilenameOut) If FileExist (strFilenameDup) == 1 Then RunWait ("notepad.exe", strFilenameDup) :Cleaning blnResult = RunHide (Environment ("COMSPEC"), "/cDEL " : ItemReplace ("*", -1, FileFullname (strFilenameTmp), ".")) Exit ;------------------------------------------------------------------------------------------------------------------------------------------