;------------------------------------------------------------------------------------------------------------------------------------------ #DefineFunction udfFileOneWordPerLine (strFilenameIn, strFilenameOut) intBBSize = FileSize (strFilenameIn) If intBBSize == 0 Then Return 0 intSizeXLate = 256 hdlBBXlate = BinaryAlloc (intSizeXLate) strFilenameXlate = StrCat (DirScript (), "MyXlate.bin") If FileSize (strFilenameXlate) == intSizeXLate BinaryRead (hdlBBXlate, strFilenameXlate) ; Load Xlate table. Else GoSub PokeTable BinaryWrite (hdlBBXlate, strFilenameXlate) ; Write Xlate table. EndIf ; Transforming. hdlBB = BinaryAlloc (intBBSize) BinaryRead (hdlBB, strFilenameIn) BinaryConvert (hdlBB, 0, 1, 0, 0) ; From ANSI to OEM, because my table is designed for ASCII chars. BinaryXlate (hdlBB, hdlBBXlate, 0) ; Apply Xlate table. hdlBBXlate = BinaryFree (hdlBBXlate) BinaryConvert (hdlBB, 1, 0, 0, 0) ; From OEM to ANSI. While BinaryReplace (hdlBB, " ", " ", @TRUE) EndWhile BinaryWrite (hdlBB, strFilenameOut) intBlankCount = BinaryStrCnt (hdlBB, 0, BinaryEodGet (hdlBB) - 1, " ") BinaryFree (hdlBB) hdlBB = BinaryAlloc (intBBSize + intBlankCount + 2 + 1) BinaryReadEx (hdlBB, 1, strFilenameOut, 0, intBBSize) BinaryPokeStr (hdlBB, 0, @LF) BinaryPokeStr (hdlBB, BinaryEodGet (hdlBB), @CR) BinaryReplace (hdlBB, " ", @CRLF, @TRUE) ; Delete all blank lines. sSearch = StrCat (@LF, @CR) While BinaryReplace (hdlBB, sSearch, "", @TRUE) EndWhile BinaryPokeStr (hdlBB, BinaryEodGet (hdlBB), @LF) BinaryWriteEx (hdlBB, 0, strFilenameOut, 0, -1) ; Set Filesize to zero. BinaryWriteEx (hdlBB, 1, strFilenameOut, 0, BinaryEodGet (hdlBB) - 1) hdlBB = BinaryFree (hdlBB) Return FileSize (strFilenameOut) ;.......................................................................................................................................... :PokeTable ;--- Translation Table ASCII ----------------------------------------------------------------------- ;Codes 0 1 2 3 4 5 6 7 8 9 A B C D E F sRow0 ="032 032 032 032 032 032 032 032 032 032 032 032 032 032 032 032" ; 0x0- ; " " sRow1 ="032 032 032 032 032 032 032 032 032 032 032 032 032 032 032 032" ; 0x1- ; " " sRow2 ="032 032 032 035 036 037 038 032 032 032 032 032 032 045 032 032" ; 0x2- ; " #$ & - " ; [pct]=037 sRow3 ="048 049 050 051 052 053 054 055 056 057 032 032 032 032 032 032" ; 0x3- ; "0123456789 " sRow4 ="064 065 066 067 068 069 070 071 072 073 074 075 076 077 078 079" ; 0x4- ; "@ABCDEFGHIJKLMNO" sRow5 ="080 081 082 083 084 085 086 087 088 089 090 032 032 032 032 095" ; 0x5- ; "PQRSTUVWXYZ _" sRow6 ="032 097 098 099 100 101 102 103 104 105 106 107 108 109 110 111" ; 0x6- ; " abcdefghijklmno" sRow7 ="112 113 114 115 116 117 118 119 120 121 122 032 032 032 032 032" ; 0x7- ; "pqrstuvwxyz " sRow8 ="128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143" ; 0x8- ; "ÇüéâäàåçêëèïîìÄÅ" sRow9 ="144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159" ; 0x9- ; "ÉæÆôöòûùÿÖÜø£Ø×" sRow10="160 161 162 163 164 165 166 167 032 032 032 032 032 032 032 032" ; 0xA- ; "áíóúñѪº " sRow11="032 032 032 032 032 032 032 032 032 032 032 032 032 032 032 032" ; 0xB- ; " " sRow12="032 032 032 032 032 032 032 032 032 032 032 032 032 032 032 032" ; 0xC- ; " " sRow13="032 032 032 032 032 032 032 032 032 032 032 032 032 032 032 032" ; 0xD- ; " " sRow14="224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239" ; 0xE- ; "ÓßÔÒõÕµþÞÚÛÙýݯ´" sRow15="032 032 032 032 032 032 032 032 032 032 032 032 032 032 032 032" ; 0xF- ; " " ;--- Translation Table ASCII ----------------------------------------------------------------------- For intRow = 0 To 15 For intCol = 0 To 15 BinaryPoke (hdlBBXlate, intRow * 16 + intCol, ItemExtract (intCol + 1, sRow%intRow%, " ")) Next Drop (sRow%intRow%) Next Return ; from GoSub PokeTable ;.......................................................................................................................................... ; This UDF "udfFileOneWordPerLine" creates a file with a single word on each line from a given textfile. ; This function can be used as a first step for creating an indexlist or "Konkordanz" ; Note: There are still duplicate entries which have to removed afterwards. ; ; Detlev Dalitz.20010101.20030702.20090425. ;.......................................................................................................................................... #EndFunction ;------------------------------------------------------------------------------------------------------------------------------------------ ; Test. strFilenameIn = IntControl (1004, 0, 0, 0, 0) ; We use this script for test input. strFilenameTmp = FileCreateTemp ("TMP") ; Temporary file for test output. ; Build a file with single word on each line. ; Words are listed in order of original occurrence. intResult = udfFileOneWordPerLine (strFilenameIn, strFilenameTmp) RunWait ("notepad.exe", strFilenameTmp) ; Wait for closing notepad. FileDelete (strFilenameTmp) ; Cleaning. Exit ;------------------------------------------------------------------------------------------------------------------------------------------