udfGetBomType
str udfGetBomType (str)
;------------------------------------------------------------------------------------------------------------------------------------------
#DefineFunction udfGetBomType (strFilename)
If FileExist (strFilename) != 1 Then Return ""
strBT = ""
hdlBB = BinaryAlloc (18)
BinaryReadEx (hdlBB, 0, strFilename, 0, 4)
BinaryPokeHex (hdlBB, 4, "FFFE 0000 FEFF EFBBBF 2B 2F 76 38 39")
Switch @TRUE
Case BinaryCompare (hdlBB, 0, hdlBB, 4, 4)  ; FF FE 00 00.
   strBT = "UTF-32LE"
   Break
Case BinaryCompare (hdlBB, 0, hdlBB, 6, 4)  ; 00 00 FE FF.
   strBT = "UTF-32BE"
   Break
Case BinaryCompare (hdlBB, 0, hdlBB, 4, 2)  ; FF FE.
   strBT = "UTF-16LE"
   Break
Case BinaryCompare (hdlBB, 0, hdlBB, 8, 2)  ; FE FF.
   strBT = "UTF-16BE"
   Break
Case BinaryCompare (hdlBB, 0, hdlBB, 10, 3) ; EF BB BF.
   strBT = "UTF-8"
   Break
Case BinaryCompare (hdlBB, 0, hdlBB, 13, 3) ; 2B 2F 76.
   Switch @TRUE
   Case BinaryCompare (hdlBB, 3, hdlBB, 13, 1) ; 2B.
   Case BinaryCompare (hdlBB, 3, hdlBB, 14, 1) ; 2F.
   Case BinaryCompare (hdlBB, 3, hdlBB, 16, 1) ; 38.
   Case BinaryCompare (hdlBB, 3, hdlBB, 17, 1) ; 39.
      strBT = "UTF-7"
   EndSwitch
EndSwitch
hdlBB = BinaryFree (hdlBB)
Return strBT
;..........................................................................................................................................
; This UDF "udfgetBomType" compares the first four bytes of the given "text" file
; against well known byte sequences, which determines the character encoding for the text in the file.
;
; If there is no known byte sequence detected, then the UDF returns an empty string, which could mean, that ANSI characters are available.
; Otherwise one of the following token strings will be returned.
;
; Token    : Bytes       : CP 1252 : Byte order
; ---------:-------------:---------:---------------
; UTF-8    : EF BB BF    :      :
; UTF-16LE : FF FE       : ÿþ      : little-endian
; UTF-16BE : FE FF       : þÿ      : big-endian
; UTF-32LE : FF FE 00 00 : ÿþ..    : little-endian
; UTF-32BE : 00 00 FE FF : ..þÿ    : big-endian
; ---------:-------------:---------:---------------
; UTF-7    : 2B 2F 76 2B : +/v+    :
; UTF-7    : 2B 2F 76 2F : +/v/    :
; UTF-7    : 2B 2F 76 38 : +/v8    :
; UTF-7    : 2B 2F 76 39 : +/v9    :
; ---------:-------------:---------:---------------
;
; (c)Detlev Dalitz.20120704.20120722.
;..........................................................................................................................................
#EndFunction
;------------------------------------------------------------------------------------------------------------------------------------------


;------------------------------------------------------------------------------------------------------------------------------------------
#DefineFunction udfIsTextUnicode (strFilename)
dnsBBsize = FileSize (strFilename, 1) ; Possible huge file size, so we use decimal number string.
If dnsBBsize == "0" Then Return @FALSE
hdlBB = BinaryAlloc (dnsBBsize)
intBytesRead = BinaryRead (hdlBB, strFilename)
blnTestPassed = !!DllCall ("Advapi32.dll", long : "IsTextUnicode", lpbinary : hdlBB, long : dnsBBsize, lpnull)
hdlBB = BinaryFree (hdlBB)
Return blnTestPassed
;..........................................................................................................................................
; This Function "udfIsTextUnicode" returns a boolean value which indicates if a given file is likely to contain a form of Unicode text or not.
;
; Alternative DllCall:
; blnTestPassed = DllCall ("Unicows.dll", long : "IsTextUnicode", lpbinary : hdlBB, long : intBBSize, lpnull)
;
; (c)Detlev Dalitz.20030701.20100114.
;..........................................................................................................................................
#EndFunction
;------------------------------------------------------------------------------------------------------------------------------------------


; Test.

DirChange (DirScript ())

strBomType00 = udfGetBomType ("20120704.udfGetBomType.Test.Empty.txt")      ; "".
strBomType01 = udfGetBomType ("20120704.udfGetBomType.Test.OneChar.txt")    ; "".

strBomType11 = udfGetBomType ("20120704.udfGetBomType.Test.ANSI.txt")        ; "".
strBomType12 = udfGetBomType ("20120704.udfGetBomType.Test.UTF-16LE.txt")    ; "UTF-16LE".
strBomType13 = udfGetBomType ("20120704.udfGetBomType.Test.UTF-16BE.txt")    ; "UTF-16BE".
strBomType14 = udfGetBomType ("20120704.udfGetBomType.Test.UTF-8.txt")       ; "UTF-8".
strBomType15 = udfGetBomType ("20120704.udfGetBomType.Test.UTF-32LE.txt")    ; "UTF-32LE".
strBomType16 = udfGetBomType ("20120704.udfGetBomType.Test.UTF-32BE.txt")    ; "UTF-32BE".

strBomType21 = udfGetBomType ("20120704.udfGetBomType.Test.UTF-7.1.txt")    ; "UTF-7".
strBomType22 = udfGetBomType ("20120704.udfGetBomType.Test.UTF-7.2.txt")    ; "UTF-7".
strBomType23 = udfGetBomType ("20120704.udfGetBomType.Test.UTF-7.3.txt")    ; "UTF-7".
strBomType24 = udfGetBomType ("20120704.udfGetBomType.Test.UTF-7.4.txt")    ; "UTF-7".
strBomType25 = udfGetBomType ("20120704.udfGetBomType.Test.UTF-7.5.txt")    ; "UTF-7".

blnUnicode1 = udfIsTextUnicode ("20120704.udfGetBomType.Test.ANSI.txt")     ; 0
blnUnicode2 = udfIsTextUnicode ("20120704.udfGetBomType.Test.UTF-16LE.txt") ; 1
blnUnicode3 = udfIsTextUnicode ("20120704.udfGetBomType.Test.UTF-16BE.txt") ; 0
blnUnicode4 = udfIsTextUnicode ("20120704.udfGetBomType.Test.UTF-8.txt")    ; 0
blnUnicode5 = udfIsTextUnicode ("20120704.udfGetBomType.Test.UTF-32LE.txt") ; 0
blnUnicode6 = udfIsTextUnicode ("20120704.udfGetBomType.Test.UTF-32BE.txt") ; 0

Exit