udfIsTextUnicode
bln udfIsTextUnicode (str)
;------------------------------------------------------------------------------------------------------------------------------------------
#DefineFunction udfIsTextUnicode (strFilename)
dnsBBsize = FileSize (strFilename, 1) ; Possible huge file size, so we use decimal number string.
If dnsBBsize == "0" Then Return @FALSE
hdlBB = BinaryAlloc (dnsBBsize)
intBytesRead = BinaryRead (hdlBB, strFilename)
blnTestPassed = !!DllCall ("Advapi32.dll", long : "IsTextUnicode", lpbinary : hdlBB, long : dnsBBsize, lpnull)
hdlBB = BinaryFree (hdlBB)
Return blnTestPassed
;..........................................................................................................................................
; This Function "udfIsTextUnicode" returns a boolean value which indicates if a given file is likely to contain a form of Unicode text or not.
;
; Alternative DllCall:
; blnTestPassed = DllCall ("Unicows.dll", long : "IsTextUnicode", lpbinary : hdlBB, long : intBBSize, lpnull)
;
; (c)Detlev Dalitz.20030701.20100114.
;..........................................................................................................................................
#EndFunction
;------------------------------------------------------------------------------------------------------------------------------------------

; Test.

strFolderTemp = ShortCutDir ("Local Settings", 0, 1) : "Temp\"
strFileAnsi = strFolderTemp : "out.ansi.txt"
strFileUC = strFolderTemp : "out.uc.txt"

intResult1 = RunHideWait ("cmd.exe", "/c dir /-p *.* >""" : strFileAnsi : """")
intResult2 = RunHideWait ("cmd.exe", "/u /c dir /-p *.* >""" : strFileUC : """")

strText1 = FileGet (strFileAnsi)
strText2 = FileGet (strFileUC)

strText1 = StrSub (strText1, 1, 70)
strText2 = StrSub (strText2, 1, 70)

blnResult1 = udfIsTextUnicode (strFileAnsi) ; 0 = @FALSE.
blnResult2 = udfIsTextUnicode (strFileUC)   ; 1 = @TRUE.

strMsgTitle = "Test: Ansi - Unicode"
strMsgText = "Ansi:" : @LF : """" : strText1 : """" : @LF : "IsUnicode: " : blnResult1 : @LF : @LF : "Unicode:" : @LF : """" : strText2 : """" : @LF : "IsUnicode: " : blnResult2
Message (strMsgTitle, strMsgText)

Exit

;------------------------------------------------------------------------------------------------------------------------------------------
;   IsTextUnicode
;   The IsTextUnicode function determines whether a buffer is likely to contain a form of Unicode text.
;   The function uses various statistical and deterministic methods to make its determination,
;   under the control of flags passed via lpi.
;   When the function returns, the results of such tests are reported via lpi.
;
;   BOOL IsTextUnicode(
;     CONST VOID* pBuffer, // input buffer to be examined
;     int cb,                // size of input buffer
;     LPINT lpi               // options
;   );
;
;   Parameters
;   lpBuffer
;   [in] Pointer to the input buffer to be examined.
;
;   cb
;   [in] Specifies the size, in bytes, of the input buffer pointed to by lpBuffer.
;
;   lpi
;   [in/out] On input, specifies the tests to be applied to the input buffer text.
;   On output, receives the results of the specified tests:
;   1 if the contents of the buffer pass a test, zero for failure.
;   Only flags that are set upon input to the function are significant upon output.
;
;   If lpi is NULL, the function uses all available tests to determine
;   whether the data in the buffer is likely to be Unicode text.
;
;   This parameter can be one or more of the following values. Value Meaning:
;   IS_TEXT_UNICODE_ASCII16             The text is Unicode, and contains onlyzero-extended ASCII values/characters.
;   IS_TEXT_UNICODE_REVERSE_ASCII16     Same as the preceding, except that the Unicode text is byte-reversed.
;
;   IS_TEXT_UNICODE_STATISTICS          The text is probably Unicode, with the determination made by applying statistical analysis. Absolute certainty is not guaranteed. See the following Remarks section.
;   IS_TEXT_UNICODE_REVERSE_STATISTICS  Same as the preceding, except that the probably-Unicode text is byte-reversed.
;
;   IS_TEXT_UNICODE_CONTROLS            The text contains Unicode representations of one or more of these nonprinting characters: RETURN, LINEFEED, SPACE, CJK_SPACE, TAB.
;   IS_TEXT_UNICODE_REVERSE_CONTROLS    Same as the preceding, except that the Unicode characters are byte-reversed.
;
;   IS_TEXT_UNICODE_BUFFER_TOO_SMALL    There are too few characters in the buffer for meaningful analysis (fewer than two bytes).
;
;   IS_TEXT_UNICODE_SIGNATURE           The text contains the Unicode byte-order mark (BOM) 0xFEFF as its first character.
;   IS_TEXT_UNICODE_REVERSE_SIGNATURE   The text contains the Unicode byte-reversed byte-order mark (Reverse BOM) 0xFFFE as its first character.
;
;   IS_TEXT_UNICODE_ILLEGAL_CHARS       The text contains one of these Unicode-illegal characters: embedded Reverse BOM, UNICODE_NUL, CRLF (packed into one WORD), or 0xFFFF.
;   IS_TEXT_UNICODE_ODD_LENGTH          The number of characters in the string is odd. A string of odd length cannot (by definition) be Unicode text.
;   IS_TEXT_UNICODE_NULL_BYTES          The text contains null bytes, which indicate non-ASCII text.
;
;   IS_TEXT_UNICODE_UNICODE_MASK        This flag constant is a combination of IS_TEXT_UNICODE_ASCII16, IS_TEXT_UNICODE_STATISTICS, IS_TEXT_UNICODE_CONTROLS, IS_TEXT_UNICODE_SIGNATURE.
;   IS_TEXT_UNICODE_REVERSE_MASK        This flag constant is a combination of IS_TEXT_UNICODE_REVERSE_ASCII16, IS_TEXT_UNICODE_REVERSE_STATISTICS, IS_TEXT_UNICODE_REVERSE_CONTROLS, IS_TEXT_UNICODE_REVERSE_SIGNATURE.
;   IS_TEXT_UNICODE_NOT_UNICODE_MASK    This flag constant is a combination of IS_TEXT_UNICODE_ILLEGAL_CHARS, IS_TEXT_UNICODE_ODD_LENGTH, and two currently unused bit flags.
;   IS_TEXT_UNICODE_NOT_ASCII_MASK      This flag constant is a combination of IS_TEXT_UNICODE_NULL_BYTES and three currently unused bit flags.
;
;   IS_TEXT_UNICODE_ASCII16               1      ; 0x0001
;   IS_TEXT_UNICODE_REVERSE_ASCII16       16     ; 0x0010
;
;   IS_TEXT_UNICODE_STATISTICS            2      ; 0x0002
;   IS_TEXT_UNICODE_REVERSE_STATISTICS    32     ; 0x0020
;
;   IS_TEXT_UNICODE_CONTROLS              4      ; 0x0004
;   IS_TEXT_UNICODE_REVERSE_CONTROLS      64     ; 0x0040
;
;   IS_TEXT_UNICODE_SIGNATURE             8      ; 0x0008
;   IS_TEXT_UNICODE_REVERSE_SIGNATURE     128    ; 0x0080
;
;   IS_TEXT_UNICODE_ILLEGAL_CHARS         256    ; 0x0100
;   IS_TEXT_UNICODE_ODD_LENGTH            512    ; 0x0200
;   IS_TEXT_UNICODE_DBCS_LEADBYTE         1024   ; 0x0400
;   IS_TEXT_UNICODE_NULL_BYTES            4096   ; 0x1000
;
;   IS_TEXT_UNICODE_UNICODE_MASK          15     ; 0x000F
;   IS_TEXT_UNICODE_REVERSE_MASK          240    ; 0x00F0
;   IS_TEXT_UNICODE_NOT_UNICODE_MASK      3840   ; 0x0F00
;   IS_TEXT_UNICODE_NOT_ASCII_MASK        61440  ; 0xF000
;
;   Return Values
;   The function returns a nonzero value if the data in the buffer passes the specified tests.
;   The function returns zero if the data in the buffer does not pass the specified tests.
;
;   Remarks
;   As noted in the preceding table of flag constants,
;   the IS_TEXT_UNICODE_STATISTICS and IS_TEXT_UNICODE_REVERSE_STATISTICS tests use statistical analysis.
;   These tests are not foolproof.
;   The statistical tests assume certain amounts of variation between low and high bytes in a string,
;   and some ASCII strings can slip through.
;   For example, if lpBuffer points to the ASCII string 65, 10, 13, 29 (A\n\r^Z),  ; 0x41 ; 0x0A ; 0x0D ; 0x1D
;   the string passes the IS_TEXT_UNICODE_STATISTICS test, though failure would be preferable.
;
;   Requirements
;     Windows NT/2000/XP: Included in Windows NT 3.5 and later.
;     Windows 95/98/Me: Unsupported.
;..........................................................................................................................................