How to split big text file into small files?
;==========================================================================================================================================
; How to split a big text file into smaller files of n lines size each?
;
; Detlev Dalitz.20040331.20100205.
;==========================================================================================================================================


;------------------------------------------------------------------------------------------------------------------------------------------
;   Wednesday, March 31, 2004 12:15 AM
;   Hi,
;   can you help me with following problem?
;   I have "big" file with more than 2800000 lines. I need split this file to smaller files with 64000 lines.
;
;   Thanks Patrik
;   patrikm patrikm@moravia-it.com
;------------------------------------------------------------------------------------------------------------------------------------------
;   >How often do you have to do this task?
;   Many times - I have database export and I need work with these data in Excel
;
;   >Is the file a text file, line delimited by CRLF sequence?
;   Lines are delimited with CRLF sequence
;
;   >What filesize overall?
;   About 400 MB
;------------------------------------------------------------------------------------------------------------------------------------------


; For the test case we use this script as test input file.
strFilename = IntControl (1004, 0, 0, 0, 0)

; Reality case is for example 2.800.000 lines, 400 MB.
;strFilename = "drive:\folder\bigfile.txt"            ; <== Change path to your needs.

intFilesize = FileSize (strFilename, 1)
Terminate (intFilesize == "0", "Error", strFilename : @LF : "Filesize is zero.")

;----------------------------
@P1 = "{1}"
@P2 = "{2}"

strMsgTitle = "SplitBigFile"
strMsgText = "Searching split points ..."
BoxOpen (strMsgTitle, strMsgText)

strMsgTextMask = strMsgText : @LF : strFilename : @LF : intFilesize : "/" : @P1

; --- Pass 1 ----------------
; We walk through the big file,
; and count the occurrences of search literal,
; and calculate where split points are,
; and collect split offsets into an itemlist.

; What do we search? We search for CRLF sequences in the big text file.
strSearch = @CRLF
intSearchLen = StrLen (strSearch)

; Big text file to split into files of n lines each.
;intSearchMax = 64000                                 ; <== Change number to your needs.  <==
intSearchMax = 20 ; For the test case we create split files with a size of 20 lines each (= 20 CRLF's).

; Chunk size can be adjusted to smaller or bigger chunks, depending on file size and system ressources.
intChunksize = intFilesize / 100                      ; <== Change chunk size to your needs.  <==
intChunkCount = 1 + (intFilesize / intChunksize)

strListSplit = ""
intOffsetFile = 0
intCountSearch = 0

hdlBB = BinaryAlloc (intChunksize)

While intChunkCount
   intOffsetBB = 0
   intResult = BinaryReadEx (hdlBB, intOffsetBB, strFilename, intOffsetFile, intChunksize)

   While intOffsetBB < intChunksize
      intOffsetBB = BinaryIndexEx (hdlBB, intOffsetBB, strSearch, @FWDSCAN, @TRUE)
      If intOffsetBB < 0 Then Break
      intOffsetBB = intOffsetBB + intSearchLen
      intCountSearch = intCountSearch + 1
      If !(intCountSearch mod intSearchMax)
         intOffsetSplit = intOffsetFile + intOffsetBB
         strListSplit = ItemInsert (intOffsetSplit, -1, strListSplit, @TAB)
         BoxText (StrReplace (strMsgTextMask, @P1, intOffsetSplit))
      EndIf
   EndWhile

   intChunkCount = intChunkCount - 1
   intOffsetFile = intOffsetFile + intChunksize
EndWhile
If intOffsetSplit < intFilesize
   strListSplit = ItemInsert (intFilesize, -1, strListSplit, @TAB)
   BoxText (StrReplace (strMsgTextMask, @P1, intFilesize))
EndIf

hdlBB = BinaryFree (hdlBB)


; --- Pass 2 ----------------
; Create the split files.

intCount = ItemCount (strListSplit, @TAB)
intCountLen = StrLen (intCount)

strMsgText = "Writing split files ..."
BoxText (strMsgText)
strMsgTextMask = strMsgText : @LF : intCount : "/" : @P1 : @LF : @P2
strFileOutMask = strFilename : ".part." : intCount : "." : @P1 : ".txt"

intSplitBegin = 0
intSplitEnd = 0
intBBsizeMax = 0

hdlBB = BinaryAlloc (0)

For intNum = 1 To intCount
   ; strNum = intNum ; Leave the counter number as is ... or for better filename sorting ...
   strNum = StrFixLeft (intNum, "0", intCountLen) ; ... make the counter number fixed length.
   intSplitEnd = ItemExtract (intNum, strListSplit, @TAB)
   intBBSize = intSplitEnd - intSplitBegin
   If intBBsize > intBBsizeMax
      intBBsizeMax = intBBSize
      hdlBB = BinaryFree (hdlBB)
      hdlBB = BinaryAlloc (intBBsize)
   EndIf
   strFilenameOut = StrReplace (strFileOutMask, @P1, strNum)
   intBytesRead = BinaryReadEx (hdlBB, 0, strFilename, intSplitBegin, intBBsize)
   intBytesWritten = BinaryWriteEx (hdlBB, 0, strFilenameOut, 0, intBBSize)
   intSplitBegin = intSplitEnd
   BoxText (StrReplace (StrReplace (strMsgTextMask, @P1, strNum), @P2, strFilenameOut))
Next

hdlBB = BinaryFree (hdlBB)

BoxShut ()

; Look into the folder.
Run ("explorer.exe", "/select, " : strFileName)
Exit
;==========================================================================================================================================