;========================================================================================================================================== ; ; How to get oldest and newest date from a column of a CSV text file? ; ;------------------------------------------------------------------------------------------------------------------------------------------ ; Topic: Sort Text Data Data YYYYMMDD to obtain Min & Max ; Conf: WinBatch ; From: rickmds info@mandecsys.com ; Date: Friday, January 20, 2012 03:58 PM ;------------------------------------------------------------------------------------------------------------------------------------------ ; ; "I have a space delimited print file of roughly 30,000 rows (File A) which contains a date in the format yyyymmdd." ; "I would like to obtain the Min & Max date from this date field so that I can use it ... ; ... to name the new file (FileB mmdd_mmdd) that will be created with the data contained in File A." ; ;------------------------------------------------------------------------------------------------------------------------------------------ ; (c)Detlev Dalitz.20120121. ;========================================================================================================================================== ; DirChange (DirScript ()) strFileThis = IntControl (1004, 0, 0, 0, 0) strFileData = ItemReplace ("Data.txt", -1, strFileThis, ".") blnResult = FileDelete (strFileData) ; Delete existing test data file. ;------------------------------------------------------------------------------------------------------------------------------------------ ; Define test parameters. intRowsMax = 10 ;intRowsMax = 100 ;intRowsMax = 1000 ;intRowsMax = 10000 ;intRowsMax = 30000 ;intRowsMax = 100000 strDataRowMask = "Data Data Data {1} Data Data" ; "{1}" will be replaced by a random date string. ;------------------------------------------------------------------------------------------------------------------------------------------ ; Step 1. ; Fill test data file. intTicksStart = GetTickCount () ; For later examination we fetch out the min and max values, while creating the test data. strYmdMin1 = "99999999" strYmdMax1 = "00000000" hdlFW = FileOpen (strFileData, "WRITE") For intI = 1 To intRowsMax strYmd = StrFixLeft (2010 + Random (2), "0", 4) : StrFixLeft (1 + Random (11), "0", 2) : StrFixLeft (1 + Random (27), "0", 2) If StrCmp (strYmd, strYmdMin1) < 0 Then strYmdMin1 = strYmd If StrCmp (strYmd, strYmdMax1) > 0 Then strYmdMax1 = strYmd strDataRow = StrReplace (strDataRowMask, "{1}", strYmd) FileWrite (hdlFW, strDataRow) Next hdlFW = FileClose (hdlFW) intTicksStop = GetTickCount () strMsgTitle = "Step 1" strMsgText = "Test data file created." strMsgText = strMsgText : @LF : @LF : "Duration[s]: " : Int (10 * (intTicksStop - intTicksStart) / 1000.0) / 10.0 strMsgText = strMsgText : @LF : @LF : "Filesize [KB]: " : Int (100 * FileSize (strFileData, 0) / 1024.0) / 100.0 strMsgText = strMsgText : @LF : @LF : "Rows: " : intRowsMax strMsgText = strMsgText : @LF : @LF : "1) Ymd Min: " : strYmdMin1 strMsgText = strMsgText : @LF : @LF : "1) Ymd Max: " : strYmdMax1 Pause (strMsgTitle, strMsgText) ;------------------------------------------------------------------------------------------------------------------------------------------ ; Step 2. ; Use WinBatch function FileRead to read data file sequentially. intTicksStart = GetTickCount () strYmdMin2 = "99999999" strYmdMax2 = "00000000" hdlFR = FileOpen (strFileData, "READ") While @TRUE strLine = FileRead (hdlFR) If strLine == "*EOF*" Then Break If strLine == "" Then Continue ; Use this ... ; >>> strYmd = ItemExtract (4, strLine, " ") ; Note: In this case, because of the space delimiter, we can use the function ItemExtract, ; <<< ; ... otherwise use this ... ; >>> ; strLine = StrReplace (strLine, " ", "|") ; strYmd = ItemExtractCsv (4, strLine, 0, "|") ; Note: For a CSV conform line use the function ItemExtractCsv. ; <<< If StrCmp (strYmd, strYmdMin2) < 0 Then strYmdMin2 = strYmd If StrCmp (strYmd, strYmdMax2) > 0 Then strYmdMax2 = strYmd EndWhile hdlFR = FileClose (hdlFR) ; Create new filename. strFileNew1 = strYmdMin2 : "_" : strYmdMax2 : ".txt" ; yyyymmdd_yyyymmdd ; old_new strFileNew2 = StrSub (strYmdMin2, 5, -1) : "_" : StrSub (strYmdMax2, 5, -1) : ".txt" ; mmdd_mmdd ; old_new intTicksStop = GetTickCount () ; Display test result. strMsgTitle = "Step 2" strMsgText = "Duration[s]: " : Int (10 * (intTicksStop - intTicksStart) / 1000.0) / 10.0 strMsgText = strMsgText : @LF : @LF : "Min and Max values OK: " : ItemExtract (1 + (strYmdMin1 == strYmdMin2 && strYmdMax1 == strYmdMax2), "no,yes", ",") strMsgText = strMsgText : @LF : @LF : "1) Ymd Min: " : strYmdMin1 : @LF : "1) Ymd Max: " : strYmdMax1 strMsgText = strMsgText : @LF : @LF : "2) Ymd Min: " : strYmdMin2 : @LF : "2) Ymd Max: " : strYmdMax2 strMsgText = strMsgText : @LF : @LF : "Filename new: " : strFileNew1 : @LF : "Filename new: " : strFileNew2 Pause (strMsgTitle, strMsgText) ;------------------------------------------------------------------------------------------------------------------------------------------ ; Step 3. ; Use WinBatch function ArrayFileGetCsv. ; Note: "Delimiter can be any single printable character, numbered from 32 to 126 (decimal), except a space(32) or double quotation mark(34)." ; So we have to change the delimiter from space to pipe character, for example. intTicksStart = GetTickCount () strDelimOld = " " strDelimNew = "|" intBytesWritten = FilePut (strFileData, StrReplace (FileGet (strFileData), strDelimOld, strDelimNew)) ; Get data into array and sort on the date column. arrData = ArrayFileGetCSV (strFileData, 0, strDelimNew) ArraySort (arrData, @ASCENDING, 3) ; Get min and max date values. strYmdMin3 = arrData[0, 3] strYmdMax3 = arrData[ArrInfo (arrData, 1) - 1, 3] ; Create new filename. strFileNew1 = strYmdMin3 : "_" : strYmdMax3 : ".txt" ; yyyymmdd_yyyymmdd ; old_new strFileNew2 = StrSub (strYmdMin3, 5, -1) : "_" : StrSub (strYmdMax3, 5, -1) : ".txt" ; mmdd_mmdd ; old_new intTicksStop = GetTickCount () ; Display test result. strMsgTitle = "Step 3" strMsgText = "Duration[s]: " : Int (10 * (intTicksStop - intTicksStart) / 1000.0) / 10.0 strMsgText = strMsgText : @LF : @LF : "Min and Max values OK: " : ItemExtract (1 + (strYmdMin1 == strYmdMin3 && strYmdMax1 == strYmdMax3), "no,yes", ",") strMsgText = strMsgText : @LF : @LF : "1) Ymd Min: " : strYmdMin1 : @LF : "1) Ymd Max: " : strYmdMax1 strMsgText = strMsgText : @LF : @LF : "3) Ymd Min: " : strYmdMin3 : @LF : "3) Ymd Max: " : strYmdMax3 strMsgText = strMsgText : @LF : @LF : "Filename new: " : strFileNew1 : @LF : "Filename new: " : strFileNew2 Pause (strMsgTitle, strMsgText) :CANCEL Exit