;------------------------------------------------------------------------------------------------------------------------------------------ #DefineFunction udfGetBomType (strFilename) If FileExist (strFilename) != 1 Then Return "" strBT = "" hdlBB = BinaryAlloc (18) BinaryReadEx (hdlBB, 0, strFilename, 0, 4) BinaryPokeHex (hdlBB, 4, "FFFE 0000 FEFF EFBBBF 2B 2F 76 38 39") Switch @TRUE Case BinaryCompare (hdlBB, 0, hdlBB, 4, 4) ; FF FE 00 00. strBT = "UTF-32LE" Break Case BinaryCompare (hdlBB, 0, hdlBB, 6, 4) ; 00 00 FE FF. strBT = "UTF-32BE" Break Case BinaryCompare (hdlBB, 0, hdlBB, 4, 2) ; FF FE. strBT = "UTF-16LE" Break Case BinaryCompare (hdlBB, 0, hdlBB, 8, 2) ; FE FF. strBT = "UTF-16BE" Break Case BinaryCompare (hdlBB, 0, hdlBB, 10, 3) ; EF BB BF. strBT = "UTF-8" Break Case BinaryCompare (hdlBB, 0, hdlBB, 13, 3) ; 2B 2F 76. Switch @TRUE Case BinaryCompare (hdlBB, 3, hdlBB, 13, 1) ; 2B. Case BinaryCompare (hdlBB, 3, hdlBB, 14, 1) ; 2F. Case BinaryCompare (hdlBB, 3, hdlBB, 16, 1) ; 38. Case BinaryCompare (hdlBB, 3, hdlBB, 17, 1) ; 39. strBT = "UTF-7" EndSwitch EndSwitch hdlBB = BinaryFree (hdlBB) Return strBT ;.......................................................................................................................................... ; This UDF "udfgetBomType" compares the first four bytes of the given "text" file ; against well known byte sequences, which determines the character encoding for the text in the file. ; ; If there is no known byte sequence detected, then the UDF returns an empty string, which could mean, that ANSI characters are available. ; Otherwise one of the following token strings will be returned. ; ; Token : Bytes : CP 1252 : Byte order ; ---------:-------------:---------:--------------- ; UTF-8 : EF BB BF :  : ; UTF-16LE : FF FE : ÿþ : little-endian ; UTF-16BE : FE FF : þÿ : big-endian ; UTF-32LE : FF FE 00 00 : ÿþ.. : little-endian ; UTF-32BE : 00 00 FE FF : ..þÿ : big-endian ; ---------:-------------:---------:--------------- ; UTF-7 : 2B 2F 76 2B : +/v+ : ; UTF-7 : 2B 2F 76 2F : +/v/ : ; UTF-7 : 2B 2F 76 38 : +/v8 : ; UTF-7 : 2B 2F 76 39 : +/v9 : ; ---------:-------------:---------:--------------- ; ; (c)Detlev Dalitz.20120704.20120722. ;.......................................................................................................................................... #EndFunction ;------------------------------------------------------------------------------------------------------------------------------------------ ;------------------------------------------------------------------------------------------------------------------------------------------ #DefineFunction udfIsTextUnicode (strFilename) dnsBBsize = FileSize (strFilename, 1) ; Possible huge file size, so we use decimal number string. If dnsBBsize == "0" Then Return @FALSE hdlBB = BinaryAlloc (dnsBBsize) intBytesRead = BinaryRead (hdlBB, strFilename) blnTestPassed = !!DllCall ("Advapi32.dll", long : "IsTextUnicode", lpbinary : hdlBB, long : dnsBBsize, lpnull) hdlBB = BinaryFree (hdlBB) Return blnTestPassed ;.......................................................................................................................................... ; This Function "udfIsTextUnicode" returns a boolean value which indicates if a given file is likely to contain a form of Unicode text or not. ; ; Alternative DllCall: ; blnTestPassed = DllCall ("Unicows.dll", long : "IsTextUnicode", lpbinary : hdlBB, long : intBBSize, lpnull) ; ; (c)Detlev Dalitz.20030701.20100114. ;.......................................................................................................................................... #EndFunction ;------------------------------------------------------------------------------------------------------------------------------------------ ; Test. DirChange (DirScript ()) strBomType00 = udfGetBomType ("20120704.udfGetBomType.Test.Empty.txt") ; "". strBomType01 = udfGetBomType ("20120704.udfGetBomType.Test.OneChar.txt") ; "". strBomType11 = udfGetBomType ("20120704.udfGetBomType.Test.ANSI.txt") ; "". strBomType12 = udfGetBomType ("20120704.udfGetBomType.Test.UTF-16LE.txt") ; "UTF-16LE". strBomType13 = udfGetBomType ("20120704.udfGetBomType.Test.UTF-16BE.txt") ; "UTF-16BE". strBomType14 = udfGetBomType ("20120704.udfGetBomType.Test.UTF-8.txt") ; "UTF-8". strBomType15 = udfGetBomType ("20120704.udfGetBomType.Test.UTF-32LE.txt") ; "UTF-32LE". strBomType16 = udfGetBomType ("20120704.udfGetBomType.Test.UTF-32BE.txt") ; "UTF-32BE". strBomType21 = udfGetBomType ("20120704.udfGetBomType.Test.UTF-7.1.txt") ; "UTF-7". strBomType22 = udfGetBomType ("20120704.udfGetBomType.Test.UTF-7.2.txt") ; "UTF-7". strBomType23 = udfGetBomType ("20120704.udfGetBomType.Test.UTF-7.3.txt") ; "UTF-7". strBomType24 = udfGetBomType ("20120704.udfGetBomType.Test.UTF-7.4.txt") ; "UTF-7". strBomType25 = udfGetBomType ("20120704.udfGetBomType.Test.UTF-7.5.txt") ; "UTF-7". blnUnicode1 = udfIsTextUnicode ("20120704.udfGetBomType.Test.ANSI.txt") ; 0 blnUnicode2 = udfIsTextUnicode ("20120704.udfGetBomType.Test.UTF-16LE.txt") ; 1 blnUnicode3 = udfIsTextUnicode ("20120704.udfGetBomType.Test.UTF-16BE.txt") ; 0 blnUnicode4 = udfIsTextUnicode ("20120704.udfGetBomType.Test.UTF-8.txt") ; 0 blnUnicode5 = udfIsTextUnicode ("20120704.udfGetBomType.Test.UTF-32LE.txt") ; 0 blnUnicode6 = udfIsTextUnicode ("20120704.udfGetBomType.Test.UTF-32BE.txt") ; 0 Exit