udfFileOneWordPerLine, ex. 2
;------------------------------------------------------------------------------------------------------------------------------------------
#DefineFunction udfFileOneWordPerLine (strFilenameIn, strFilenameOut)
intBBSize = FileSize (strFilenameIn)
If intBBSize == 0 Then Return 0

intSizeXLate = 256
hdlBBXlate = BinaryAlloc (intSizeXLate)
strFilenameXlate = StrCat (DirScript (), "MyXlate.bin")
If FileSize (strFilenameXlate) == intSizeXLate
   BinaryRead (hdlBBXlate, strFilenameXlate) ; Load Xlate table.
Else
   GoSub PokeTable
   BinaryWrite (hdlBBXlate, strFilenameXlate) ; Write Xlate table.
EndIf

; Transforming.
hdlBB = BinaryAlloc (intBBSize)
BinaryRead (hdlBB, strFilenameIn)
BinaryConvert (hdlBB, 0, 1, 0, 0) ; From ANSI to OEM, because my table is designed for ASCII chars.
BinaryXlate (hdlBB, hdlBBXlate, 0) ; Apply Xlate table.
hdlBBXlate = BinaryFree (hdlBBXlate)
BinaryConvert (hdlBB, 1, 0, 0, 0) ; From OEM to ANSI.
While BinaryReplace (hdlBB, "  ", " ", @TRUE)
EndWhile
BinaryWrite (hdlBB, strFilenameOut)
intBlankCount = BinaryStrCnt (hdlBB, 0, BinaryEodGet (hdlBB) - 1, " ")
BinaryFree (hdlBB)

hdlBB = BinaryAlloc (intBBSize + intBlankCount + 2 + 1)
BinaryReadEx (hdlBB, 1, strFilenameOut, 0, intBBSize)
BinaryPokeStr (hdlBB, 0, @LF)
BinaryPokeStr (hdlBB, BinaryEodGet (hdlBB), @CR)
BinaryReplace (hdlBB, " ", @CRLF, @TRUE)
; Delete all blank lines.
sSearch = StrCat (@LF, @CR)
While BinaryReplace (hdlBB, sSearch, "", @TRUE)
EndWhile
BinaryPokeStr (hdlBB, BinaryEodGet (hdlBB), @LF)
BinaryWriteEx (hdlBB, 0, strFilenameOut, 0, -1) ; Set Filesize to zero.
BinaryWriteEx (hdlBB, 1, strFilenameOut, 0, BinaryEodGet (hdlBB) - 1)
hdlBB = BinaryFree (hdlBB)
Return FileSize (strFilenameOut)

;..........................................................................................................................................
:PokeTable
;--- Translation Table ASCII -----------------------------------------------------------------------
;Codes  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
sRow0 ="032 032 032 032 032 032 032 032 032 032 032 032 032 032 032 032" ; 0x0- ; "                "
sRow1 ="032 032 032 032 032 032 032 032 032 032 032 032 032 032 032 032" ; 0x1- ; "                "
sRow2 ="032 032 032 035 036 037 038 032 032 032 032 032 032 045 032 032" ; 0x2- ; "   #$ &      -  " ; [pct]=037
sRow3 ="048 049 050 051 052 053 054 055 056 057 032 032 032 032 032 032" ; 0x3- ; "0123456789      "
sRow4 ="064 065 066 067 068 069 070 071 072 073 074 075 076 077 078 079" ; 0x4- ; "@ABCDEFGHIJKLMNO"
sRow5 ="080 081 082 083 084 085 086 087 088 089 090 032 032 032 032 095" ; 0x5- ; "PQRSTUVWXYZ    _"
sRow6 ="032 097 098 099 100 101 102 103 104 105 106 107 108 109 110 111" ; 0x6- ; " abcdefghijklmno"
sRow7 ="112 113 114 115 116 117 118 119 120 121 122 032 032 032 032 032" ; 0x7- ; "pqrstuvwxyz     "
sRow8 ="128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143" ; 0x8- ; "ÇüéâäàåçêëèïîìÄÅ"
sRow9 ="144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159" ; 0x9- ; "ÉæÆôöòûùÿÖÜø£Ø׃"
sRow10="160 161 162 163 164 165 166 167 032 032 032 032 032 032 032 032" ; 0xA- ; "áíóúñѪº        "
sRow11="032 032 032 032 032 032 032 032 032 032 032 032 032 032 032 032" ; 0xB- ; "                "
sRow12="032 032 032 032 032 032 032 032 032 032 032 032 032 032 032 032" ; 0xC- ; "                "
sRow13="032 032 032 032 032 032 032 032 032 032 032 032 032 032 032 032" ; 0xD- ; "                "
sRow14="224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239" ; 0xE- ; "ÓßÔÒõÕµþÞÚÛÙýݯ´"
sRow15="032 032 032 032 032 032 032 032 032 032 032 032 032 032 032 032" ; 0xF- ; "                "
;--- Translation Table ASCII -----------------------------------------------------------------------
For intRow = 0 To 15
   For intCol = 0 To 15
      BinaryPoke (hdlBBXlate, intRow * 16 + intCol, ItemExtract (intCol + 1, sRow%intRow%, " "))
   Next
   Drop (sRow%intRow%)
Next
Return ; from GoSub PokeTable
;..........................................................................................................................................
; This UDF "udfFileOneWordPerLine" creates a file with a single word on each line from a given textfile.
; This function can be used as a first step for creating an indexlist or "Konkordanz"
; Note: There are still duplicate entries which have to removed afterwards.
;
; Detlev Dalitz.20010101.20030702.20090425.
;..........................................................................................................................................
#EndFunction
;------------------------------------------------------------------------------------------------------------------------------------------

;------------------------------------------------------------------------------------------------------------------------------------------
#DefineFunction udfBinaryDelDupLines (strFilenameIn, strFilenameOut, strFilenameDup, blnTrimBlanks, blnTrimEmptyLines)
intSizeIn = FileSize (strFilenameIn)
If intSizeIn == 0 Then Return @FALSE ; Nothing to do.

; Create filename for output file if parameter strFilenameOut is empty.
If strFilenameOut == "" Then strFilenameOut = ItemReplace (StrCat ("deduped.", ItemExtract (-1, strFilenameIn, ".")), -1, strFilenameIn, ".")

hdlBB = BinaryAlloc (2 + intSizeIn)
BinaryPokeStr (hdlBB, 0, @LF) ; A little helper.
BinaryReadEx (hdlBB, 1, strFilenameIn, 0, intSizeIn)
BinaryPokeStr (hdlBB, BinaryEodGet (hdlBB), @CR) ; A little helper.

blnDupYes = strFilenameDup != ""

If !!blnTrimEmptyLines Then BinaryReplace (hdlBB, StrCat (@LF, @CR), "", @FALSE) ; Delete all blank lines.

If !!blnTrimBlanks
   ; Delete trailing blanks.
   strSearch = StrCat (" ", @CR)
   While BinaryReplace (hdlBB, strSearch, @CR, @FALSE)
   EndWhile
   ; Delete leading blanks.
   strSearch = StrCat (@LF, " ")
   While BinaryReplace (hdlBB, strSearch, @LF, @FALSE)
   EndWhile
EndIf

If blnDupYes Then hdlFDup = FileOpen (strFilenameDup, "WRITE")
hdlFOut = FileOpen (strFilenameOut, "WRITE")
While @TRUE
   strBBTag = BinaryTagInit (hdlBB, @LF, @CR)
   strBBTag = BinaryTagFind (strBBTag)
   If strBBTag == "" Then Break
   strLine = BinaryTagExtr (strBBTag, 0)
   FileWrite (hdlFOut, strLine)
   If strLine == ""
      strBBTag = BinaryTagRepl (strBBTag, "")
      Continue
   EndIf
   intDupCount = BinaryReplace (hdlBB, StrCat (@LF, strLine, @CR), "", @FALSE)
   If blnDupYes
      For intDup = 2 To intDupCount
         FileWrite (hdlFDup, strLine)
      Next
   EndIf
EndWhile

hdlBB = BinaryFree (hdlBB)
hdlFOut = FileClose (hdlFOut)
If blnDupYes Then hdlFDup = FileClose (hdlFDup)

Return @TRUE
;..........................................................................................................................................
; This UDF "udfDelDupLines" extracts duplicate lines out of a textfile using WinBatch binary functions.
; The input file strFilenameIn must be a textfile with @CRLF as eol delimiter.
; The input file needs not to be sorted.
;
; If paramteter "strFilenameOut" is an empty string then the function uses the parameter "strFilenameIn"
; to create a similar output filename but with an additional suffix ".deduped".
;
; If blnTrimBlanks is @TRUE or 1 then leading and trailing blanks will be deleted,
; If blnTrimBlanks is @FALSE or 0 then leading and trailing blanks will not be touched.
;
; If blnTrimEmptyLines is @TRUE or 1 then empty lines will be deleted.
; If blnTrimEmptyLines is @FALSE or 0 then empty lines will persist.
;
; This UDF "udfDelDupLines" returns @TRUE on success otherwise @FALSE.
;..........................................................................................................................................
; Detlev Dalitz.20020210.20020715.20030703.20090424.
;..........................................................................................................................................
#EndFunction
;------------------------------------------------------------------------------------------------------------------------------------------

;------------------------------------------------------------------------------------------------------------------------------------------
#DefineFunction udfBinarySortTextFile (strFilenameIn, strFilenameOut, intDirection, intKeyColumn, intKeySize)
intFileSize = FileSize (strFilenameIn)
If intFileSize == 0 Then Return @FALSE
If intDirection != @ASCENDING Then If intDirection != @DESCENDING Then Return @FALSE
; Create filename for output file if parameter strFilenameOut is empty.
If strFilenameOut == "" Then strFilenameOut = ItemReplace (StrCat ("sorted.", ItemExtract (-1, strFilenameIn, ".")), -1, strFilenameIn, ".")

; Get maximum line size for sort record.
intLineSize = 0
intLineCount = 0
IntControl (65, 4096 * 256, 0, 0, 0) ; Enlarge FileRead buffer for speedy access. ; Added DD.20030128
hdlFR = FileOpen (strFilenameIn, "READ")
While @TRUE
   strLine = FileRead (hdlFR)
   If strLine == "*EOF*" Then Break
   intLineSize = Max (intLineSize, StrLen (strLine))
   intLineCount = intLineCount + 1
EndWhile
hdlFR = FileClose (hdlFR)

; Fill the buffer.
intLineSize = intLineSize + 2 ; Include trailing @CRLF.
hdlBB = BinaryAlloc (intLineCount * intLineSize)
IntControl (65, 4096 * 256, 0, 0, 0) ; Enlarge FileRead buffer for speedy access. ; Added DD.20030128
hdlFR = FileOpen (strFilenameIn, "READ")
intOffset = 0
While @TRUE
   strLine = FileRead (hdlFR)
   If strLine == "*EOF*" Then Break
   BinaryPokeStr (hdlBB, intOffset, strLine)
   intOffset = intOffset + intLineSize
   BinaryPokeStr (hdlBB, intOffset - 2, @CRLF)
EndWhile
hdlFR = FileClose (hdlFR)

; Sort the buffer.
intKeyColumn = Max (1, intKeyColumn)
iKeyOffset = intKeyColumn - 1
intKeySize   = Max (0, intKeySize)
If intKeySize == 0 Then intKeySize = intLineSize - iKeyOffset
If !BinarySort (hdlBB, intLineSize, iKeyOffset, intKeySize, @STRING|intDirection) Then Return @FALSE

; Delete the binary zeroes.
BinaryReplace (hdlBB, "", "", @FALSE)

; Write the buffer to diskfile.
BinaryWrite (hdlBB, strFilenameOut)
hdlBB = BinaryFree (hdlBB)

Return FileExist (strFilenameOut)
;..........................................................................................................................................
; This UDF "udfBinarySortTextFile" sorts an input textfile
; in ascending or descending order by using WinBatch Binary Functions.
; If output filename is omitted then the function uses the input filename
; to create a similar output filename but with an additional suffix ".sorted".
;
; Parameter:
; strFilenameIn .............. The input textfile.
; strFilenameOut ............. The output textfile.
; intDirection=@ASCENDING .... Sort order alphabetic ascending.
; intDirection=@DESCENDING ... Sort order alphabetic descending.
; intKeyColumn ............... Start column of the sortkey, one based (first char=first column).
; intKeySize ................. Length of the sortkey.
;
; Return value ............... @TRUE if all was ok resp. @FALSE if something was wrong.
;..........................................................................................................................................
; Detlev Dalitz.20010709.20020708.20030128.20090424.
;..........................................................................................................................................
#EndFunction
;------------------------------------------------------------------------------------------------------------------------------------------


; Test.

strFilenameIn = IntControl (1004, 0, 0, 0, 0) ; We use this script for test input.
strFilenameTmp = FileCreateTemp ("TMP") ; Temporary file for test output.
strFilenameDup = FileCreateTemp ("DUP") ; Temporary file for test output.

; Build a file with a single word on each line.
; Words are listed in order of original occurrence.
intResult = udfFileOneWordPerLine (strFilenameIn, strFilenameTmp)
;RunWait ("notepad.exe", strFilenameTmp) ; Wait for closing notepad.

; Build a file with a single word on each line but sorted.
blnResult = udfBinarySortTextFile (strFilenameTmp, strFilenameTmp, @ASCENDING, 0, 0)
;RunWait ("notepad.exe", strFilenameTmp) ; Wait for closing notepad.

; Build a condensed file with an unique word on each line.
blnResult = udfBinaryDelDupLines (strFilenameTmp, strFilenameTmp, strFilenameDup, @TRUE, @TRUE)
RunWait ("notepad.exe", strFilenameTmp) ; Wait for closing notepad.

; Build a file with all those words which occurred multiple in the original file.
blnResult = udfBinaryDelDupLines (strFilenameDup, strFilenameDup, "", @TRUE, @TRUE)
RunWait ("notepad.exe", strFilenameDup) ; Wait for closing notepad.

; Cleaning.
FileDelete (strFilenameTmp)
FileDelete (strFilenameDup)
Exit
;------------------------------------------------------------------------------------------------------------------------------------------