;------------------------------------------------------------------------------------------------------------------------------------------ #DefineFunction udfFileDelDupLinesV2 (strFilenameIn, strFilenameOut, strFilenameDup, blnTrimBlanks, blnTrimEmptyLines) intSizeIn = FileSize (strFilenameIn) If intSizeIn == 0 Then Return @FALSE ; Nothing to do. ; Create filename for output file if parameter strFilenameOut is empty. If strFilenameOut == "" Then strFilenameOut = ItemReplace ("deduped." : ItemExtract (-1, strFilenameIn, "."), -1, strFilenameIn, ".") hdlBB = BinaryAlloc (2 + intSizeIn) BinaryPokeStr (hdlBB, 0, @LF) ; A little helper. BinaryReadEx (hdlBB, 1, strFilenameIn, 0, intSizeIn) BinaryPokeStr (hdlBB, BinaryEodGet (hdlBB), @CR) ; A little helper. blnDupYes = strFilenameDup != "" If !!blnTrimEmptyLines Then BinaryReplace (hdlBB, @LF : @CR, "", @FALSE) ; Delete all blank lines. If !!blnTrimBlanks ; Delete trailing blanks. strSearch = " " : @CR While BinaryReplace (hdlBB, strSearch, @CR, @FALSE) EndWhile ; Delete leading blanks. strSearch = @LF : " " While BinaryReplace (hdlBB, strSearch, @LF, @FALSE) EndWhile EndIf If blnDupYes Then hdlFDup = FileOpen (strFilenameDup, "WRITE") hdlFOut = FileOpen (strFilenameOut, "WRITE") While @TRUE strBBTag = BinaryTagInit (hdlBB, @LF, @CR) strBBTag = BinaryTagFind (strBBTag) If strBBTag == "" Then Break strLine = BinaryTagExtr (strBBTag, 0) FileWrite (hdlFOut, strLine) If strLine == "" strBBTag = BinaryTagRepl (strBBTag, "") Continue EndIf intDupCount = BinaryReplace (hdlBB, @LF : strLine : @CR, "", @FALSE) If blnDupYes For intDup = 2 To intDupCount FileWrite (hdlFDup, strLine) Next EndIf EndWhile hdlBB = BinaryFree (hdlBB) hdlFOut = FileClose (hdlFOut) If blnDupYes Then hdlFDup = FileClose (hdlFDup) Return FileSize (strFilenameIn) > FileSize (strFilenameOut) ;.......................................................................................................................................... ; This UDF "udfFileDelDupLinesV2" removes duplicate lines from a textfile using WinBatch binary functions. ; The input file strFilenameIn must be a textfile with @CRLF as eol delimiter. ; The input file needs not to be sorted. ; ; If parameter "strFilenameOut" is an empty string then the function uses the parameter "strFilenameIn" ; to create a similar output filename but with an additional suffix ".deduped". ; ; If blnTrimBlanks is @TRUE or 1 then leading and trailing blanks will be deleted, ; If blnTrimBlanks is @FALSE or 0 then leading and trailing blanks will not be touched. ; ; If blnTrimEmptyLines is @TRUE or 1 then empty lines will be deleted. ; If blnTrimEmptyLines is @FALSE or 0 then empty lines will persist. ; ; If input filesize is zero, then the function returns @FALSE immediately. ; On success the function returns @TRUE to indicate that there were duplicate lines detected, otherwise @FALSE. ;.......................................................................................................................................... ; Detlev Dalitz.20020210.20020715.20030703.20090426.20090716. ;.......................................................................................................................................... #EndFunction ;------------------------------------------------------------------------------------------------------------------------------------------ ; Test. ; Setup test environment. strFilenameIn = FileCreateTemp ("IN") strFilenameOut = FileCreateTemp ("OUT") strFilenameDup = FileCreateTemp ("DUP") hFW = FileOpen (strFilenameIn, "WRITE") FileWrite (hFW, " ***FILEA UNIQUE***") FileWrite (hFW, " ***COMMON*** ") FileWrite (hFW, "") FileWrite (hFW, " ***File A DUP***") FileWrite (hFW, " ***COMMON DUP***") FileWrite (hFW, " ***File A DUP***") FileWrite (hFW, " ***COMMON DUP***") FileWrite (hFW, "") FileWrite (hFW, " ***File A 1*** ") FileWrite (hFW, " ***File X 1*** ") FileWrite (hFW, " ***File A 2*** ") FileWrite (hFW, " ***File X 2*** ") FileWrite (hFW, " ***File A 2*** ") FileWrite (hFW, " ***File X 2*** ") FileWrite (hFW, " ***File A 3*** ") FileWrite (hFW, " ***File X 3*** ") FileWrite (hFW, " ***File A 3*** ") FileWrite (hFW, " ***File X 3*** ") FileWrite (hFW, " ***File A 3*** ") FileWrite (hFW, "***File X 3*** ") hFW = FileClose (hFW) ; Select testcase. While @TRUE intTest = Int ("0" : AskItemlist ("Choose a testcase", "1,2,3,4", ",", @UNSORTED, @SINGLE)) Goto Test%intTest% :Test0 Goto Cleaning :Test1 blnResult1 = udfFileDelDupLinesV2 (strFilenameIn, strFilenameOut, strFilenameDup, @FALSE, @FALSE) If FileExist (strFilenameIn) == 1 Then RunWait ("notepad.exe", strFilenameIn) If FileExist (strFilenameOut) == 1 Then RunWait ("notepad.exe", strFilenameOut) If FileExist (strFilenameDup) == 1 Then RunWait ("notepad.exe", strFilenameDup) Goto Cleaning :Test2 blnResult2 = udfFileDelDupLinesV2 (strFilenameIn, strFilenameOut, strFilenameDup, @TRUE, @FALSE) If FileExist (strFilenameIn) == 1 Then RunWait ("notepad.exe", strFilenameIn) If FileExist (strFilenameOut) == 1 Then RunWait ("notepad.exe", strFilenameOut) If FileExist (strFilenameDup) == 1 Then RunWait ("notepad.exe", strFilenameDup) Goto Cleaning :Test3 blnResult3 = udfFileDelDupLinesV2 (strFilenameIn, strFilenameOut, strFilenameDup, @TRUE, @TRUE) If FileExist (strFilenameIn) == 1 Then RunWait ("notepad.exe", strFilenameIn) If FileExist (strFilenameOut) == 1 Then RunWait ("notepad.exe", strFilenameOut) If FileExist (strFilenameDup) == 1 Then RunWait ("notepad.exe", strFilenameDup) Goto Cleaning :Test4 blnResult4 = udfFileDelDupLinesV2 (strFilenameIn, strFilenameOut, "", @FALSE, @FALSE) If FileExist (strFilenameIn) == 1 Then RunWait ("notepad.exe", strFilenameIn) If FileExist (strFilenameOut) == 1 Then RunWait ("notepad.exe", strFilenameOut) Goto Cleaning :Cleaning FileDelete (strFilenameOut) FileDelete (strFilenameDup) EndWhile :CANCEL FileDelete (strFilenameIn) FileDelete (strFilenameOut) FileDelete (strFilenameDup) Exit ;------------------------------------------------------------------------------------------------------------------------------------------