;------------------------------------------------------------------------------------------------------------------------------------------#DefineFunctionudfFileOneWordPerLine(strFilenameIn,strFilenameOut)intBBSize=FileSize(strFilenameIn)IfintBBSize==0ThenReturn0intSizeXLate=256hdlBBXlate=BinaryAlloc(intSizeXLate)strFilenameXlate=StrCat(DirScript(),"MyXlate.bin")IfFileSize(strFilenameXlate)==intSizeXLateBinaryRead(hdlBBXlate,strFilenameXlate); Load Xlate table.ElseGoSubPokeTableBinaryWrite(hdlBBXlate,strFilenameXlate); Write Xlate table.EndIf; Transforming.hdlBB=BinaryAlloc(intBBSize)BinaryRead(hdlBB,strFilenameIn)BinaryConvert(hdlBB,0,1,0,0); From ANSI to OEM, because my table is designed for ASCII chars.BinaryXlate(hdlBB,hdlBBXlate,0); Apply Xlate table.hdlBBXlate=BinaryFree(hdlBBXlate)BinaryConvert(hdlBB,1,0,0,0); From OEM to ANSI.WhileBinaryReplace(hdlBB,"","",@TRUE)EndWhileBinaryWrite(hdlBB,strFilenameOut)intBlankCount=BinaryStrCnt(hdlBB,0,BinaryEodGet(hdlBB)-1,"")BinaryFree(hdlBB)hdlBB=BinaryAlloc(intBBSize+intBlankCount+2+1)BinaryReadEx(hdlBB,1,strFilenameOut,0,intBBSize)BinaryPokeStr(hdlBB,0,@LF)BinaryPokeStr(hdlBB,BinaryEodGet(hdlBB),@CR)BinaryReplace(hdlBB,"",@CRLF,@TRUE); Delete all blank lines.sSearch=StrCat(@LF,@CR)WhileBinaryReplace(hdlBB,sSearch,"",@TRUE)EndWhileBinaryPokeStr(hdlBB,BinaryEodGet(hdlBB),@LF)BinaryWriteEx(hdlBB,0,strFilenameOut,0,-1); Set Filesize to zero.BinaryWriteEx(hdlBB,1,strFilenameOut,0,BinaryEodGet(hdlBB)-1)hdlBB=BinaryFree(hdlBB)ReturnFileSize(strFilenameOut);..........................................................................................................................................:PokeTable;--- Translation Table ASCII -----------------------------------------------------------------------;Codes 0 1 2 3 4 5 6 7 8 9 A B C D E FsRow0="032 032 032 032 032 032 032 032 032 032 032 032 032 032 032 032"; 0x0- ; " "sRow1="032 032 032 032 032 032 032 032 032 032 032 032 032 032 032 032"; 0x1- ; " "sRow2="032 032 032 035 036 037 038 032 032 032 032 032 032 045 032 032"; 0x2- ; " #$ & - " ; [pct]=037sRow3="048 049 050 051 052 053 054 055 056 057 032 032 032 032 032 032"; 0x3- ; "0123456789 "sRow4="064 065 066 067 068 069 070 071 072 073 074 075 076 077 078 079"; 0x4- ; "@ABCDEFGHIJKLMNO"sRow5="080 081 082 083 084 085 086 087 088 089 090 032 032 032 032 095"; 0x5- ; "PQRSTUVWXYZ _"sRow6="032 097 098 099 100 101 102 103 104 105 106 107 108 109 110 111"; 0x6- ; " abcdefghijklmno"sRow7="112 113 114 115 116 117 118 119 120 121 122 032 032 032 032 032"; 0x7- ; "pqrstuvwxyz "sRow8="128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143"; 0x8- ; "ÇüéâäàåçêëèïîìÄÅ"sRow9="144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159"; 0x9- ; "ÉæÆôöòûùÿÖÜø£Ø×"sRow10="160 161 162 163 164 165 166 167 032 032 032 032 032 032 032 032"; 0xA- ; "áíóúñѪº "sRow11="032 032 032 032 032 032 032 032 032 032 032 032 032 032 032 032"; 0xB- ; " "sRow12="032 032 032 032 032 032 032 032 032 032 032 032 032 032 032 032"; 0xC- ; " "sRow13="032 032 032 032 032 032 032 032 032 032 032 032 032 032 032 032"; 0xD- ; " "sRow14="224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239"; 0xE- ; "ÓßÔÒõÕµþÞÚÛÙýݯ´"sRow15="032 032 032 032 032 032 032 032 032 032 032 032 032 032 032 032"; 0xF- ; " ";--- Translation Table ASCII -----------------------------------------------------------------------ForintRow=0To15ForintCol=0To15BinaryPoke(hdlBBXlate,intRow*16+intCol,ItemExtract(intCol+1,sRow%intRow%,""))NextDrop(sRow%intRow%)NextReturn; from GoSub PokeTable;..........................................................................................................................................; This UDF "udfFileOneWordPerLine" creates a file with a single word on each line from a given textfile.; This function can be used as a first step for creating an indexlist or "Konkordanz"; Note: There are still duplicate entries which have to removed afterwards.;; Detlev Dalitz.20010101.20030702.20090425.;..........................................................................................................................................#EndFunction;------------------------------------------------------------------------------------------------------------------------------------------;------------------------------------------------------------------------------------------------------------------------------------------#DefineFunctionudfBinaryDelDupLines(strFilenameIn,strFilenameOut,strFilenameDup,blnTrimBlanks,blnTrimEmptyLines)intSizeIn=FileSize(strFilenameIn)IfintSizeIn==0ThenReturn@FALSE; Nothing to do.; Create filename for output file if parameter strFilenameOut is empty.IfstrFilenameOut==""ThenstrFilenameOut=ItemReplace(StrCat("deduped.",ItemExtract(-1,strFilenameIn,".")),-1,strFilenameIn,".")hdlBB=BinaryAlloc(2+intSizeIn)BinaryPokeStr(hdlBB,0,@LF); A little helper.BinaryReadEx(hdlBB,1,strFilenameIn,0,intSizeIn)BinaryPokeStr(hdlBB,BinaryEodGet(hdlBB),@CR); A little helper.blnDupYes=strFilenameDup!=""If!!blnTrimEmptyLinesThenBinaryReplace(hdlBB,StrCat(@LF,@CR),"",@FALSE); Delete all blank lines.If!!blnTrimBlanks; Delete trailing blanks.strSearch=StrCat("",@CR)WhileBinaryReplace(hdlBB,strSearch,@CR,@FALSE)EndWhile; Delete leading blanks.strSearch=StrCat(@LF,"")WhileBinaryReplace(hdlBB,strSearch,@LF,@FALSE)EndWhileEndIfIfblnDupYesThenhdlFDup=FileOpen(strFilenameDup,"WRITE")hdlFOut=FileOpen(strFilenameOut,"WRITE")While@TRUEstrBBTag=BinaryTagInit(hdlBB,@LF,@CR)strBBTag=BinaryTagFind(strBBTag)IfstrBBTag==""ThenBreakstrLine=BinaryTagExtr(strBBTag,0)FileWrite(hdlFOut,strLine)IfstrLine==""strBBTag=BinaryTagRepl(strBBTag,"")ContinueEndIfintDupCount=BinaryReplace(hdlBB,StrCat(@LF,strLine,@CR),"",@FALSE)IfblnDupYesForintDup=2TointDupCountFileWrite(hdlFDup,strLine)NextEndIfEndWhilehdlBB=BinaryFree(hdlBB)hdlFOut=FileClose(hdlFOut)IfblnDupYesThenhdlFDup=FileClose(hdlFDup)Return@TRUE;..........................................................................................................................................; This UDF "udfDelDupLines" extracts duplicate lines out of a textfile using WinBatch binary functions.; The input file strFilenameIn must be a textfile with @CRLF as eol delimiter.; The input file needs not to be sorted.;; If paramteter "strFilenameOut" is an empty string then the function uses the parameter "strFilenameIn"; to create a similar output filename but with an additional suffix ".deduped".;; If blnTrimBlanks is @TRUE or 1 then leading and trailing blanks will be deleted,; If blnTrimBlanks is @FALSE or 0 then leading and trailing blanks will not be touched.;; If blnTrimEmptyLines is @TRUE or 1 then empty lines will be deleted.; If blnTrimEmptyLines is @FALSE or 0 then empty lines will persist.;; This UDF "udfDelDupLines" returns @TRUE on success otherwise @FALSE.;..........................................................................................................................................; Detlev Dalitz.20020210.20020715.20030703.20090424.;..........................................................................................................................................#EndFunction;------------------------------------------------------------------------------------------------------------------------------------------;------------------------------------------------------------------------------------------------------------------------------------------#DefineFunctionudfBinarySortTextFile(strFilenameIn,strFilenameOut,intDirection,intKeyColumn,intKeySize)intFileSize=FileSize(strFilenameIn)IfintFileSize==0ThenReturn@FALSEIfintDirection!=@ASCENDINGThenIfintDirection!=@DESCENDINGThenReturn@FALSE; Create filename for output file if parameter strFilenameOut is empty.IfstrFilenameOut==""ThenstrFilenameOut=ItemReplace(StrCat("sorted.",ItemExtract(-1,strFilenameIn,".")),-1,strFilenameIn,"."); Get maximum line size for sort record.intLineSize=0intLineCount=0IntControl(65,4096*256,0,0,0); Enlarge FileRead buffer for speedy access. ; Added DD.20030128hdlFR=FileOpen(strFilenameIn,"READ")While@TRUEstrLine=FileRead(hdlFR)IfstrLine=="*EOF*"ThenBreakintLineSize=Max(intLineSize,StrLen(strLine))intLineCount=intLineCount+1EndWhilehdlFR=FileClose(hdlFR); Fill the buffer.intLineSize=intLineSize+2; Include trailing @CRLF.hdlBB=BinaryAlloc(intLineCount*intLineSize)IntControl(65,4096*256,0,0,0); Enlarge FileRead buffer for speedy access. ; Added DD.20030128hdlFR=FileOpen(strFilenameIn,"READ")intOffset=0While@TRUEstrLine=FileRead(hdlFR)IfstrLine=="*EOF*"ThenBreakBinaryPokeStr(hdlBB,intOffset,strLine)intOffset=intOffset+intLineSizeBinaryPokeStr(hdlBB,intOffset-2,@CRLF)EndWhilehdlFR=FileClose(hdlFR); Sort the buffer.intKeyColumn=Max(1,intKeyColumn)iKeyOffset=intKeyColumn-1intKeySize=Max(0,intKeySize)IfintKeySize==0ThenintKeySize=intLineSize-iKeyOffsetIf!BinarySort(hdlBB,intLineSize,iKeyOffset,intKeySize,@STRING|intDirection)ThenReturn@FALSE; Delete the binary zeroes.BinaryReplace(hdlBB,"","",@FALSE); Write the buffer to diskfile.BinaryWrite(hdlBB,strFilenameOut)hdlBB=BinaryFree(hdlBB)ReturnFileExist(strFilenameOut);..........................................................................................................................................; This UDF "udfBinarySortTextFile" sorts an input textfile; in ascending or descending order by using WinBatch Binary Functions.; If output filename is omitted then the function uses the input filename; to create a similar output filename but with an additional suffix ".sorted".;; Parameter:; strFilenameIn .............. The input textfile.; strFilenameOut ............. The output textfile.; intDirection=@ASCENDING .... Sort order alphabetic ascending.; intDirection=@DESCENDING ... Sort order alphabetic descending.; intKeyColumn ............... Start column of the sortkey, one based (first char=first column).; intKeySize ................. Length of the sortkey.;; Return value ............... @TRUE if all was ok resp. @FALSE if something was wrong.;..........................................................................................................................................; Detlev Dalitz.20010709.20020708.20030128.20090424.;..........................................................................................................................................#EndFunction;------------------------------------------------------------------------------------------------------------------------------------------; Test.strFilenameIn=IntControl(1004,0,0,0,0); We use this script for test input.strFilenameTmp=FileCreateTemp("TMP"); Temporary file for test output.strFilenameDup=FileCreateTemp("DUP"); Temporary file for test output.; Build a file with a single word on each line.; Words are listed in order of original occurrence.intResult=udfFileOneWordPerLine(strFilenameIn,strFilenameTmp);RunWait ("notepad.exe", strFilenameTmp) ; Wait for closing notepad.; Build a file with a single word on each line but sorted.blnResult=udfBinarySortTextFile(strFilenameTmp,strFilenameTmp,@ASCENDING,0,0);RunWait ("notepad.exe", strFilenameTmp) ; Wait for closing notepad.; Build a condensed file with an unique word on each line.blnResult=udfBinaryDelDupLines(strFilenameTmp,strFilenameTmp,strFilenameDup,@TRUE,@TRUE)RunWait("notepad.exe",strFilenameTmp); Wait for closing notepad.; Build a file with all those words which occurred multiple in the original file.blnResult=udfBinaryDelDupLines(strFilenameDup,strFilenameDup,"",@TRUE,@TRUE)RunWait("notepad.exe",strFilenameDup); Wait for closing notepad.; Cleaning.FileDelete(strFilenameTmp)FileDelete(strFilenameDup)Exit;------------------------------------------------------------------------------------------------------------------------------------------