;------------------------------------------------------------------------------------------------------------------------------------------ #DefineFunction udfIsTextUnicode (strFilename) dnsBBsize = FileSize (strFilename, 1) ; Possible huge file size, so we use decimal number string. If dnsBBsize == "0" Then Return @FALSE hdlBB = BinaryAlloc (dnsBBsize) intBytesRead = BinaryRead (hdlBB, strFilename) blnTestPassed = !!DllCall ("Advapi32.dll", long : "IsTextUnicode", lpbinary : hdlBB, long : dnsBBsize, lpnull) hdlBB = BinaryFree (hdlBB) Return blnTestPassed ;.......................................................................................................................................... ; This Function "udfIsTextUnicode" returns a boolean value which indicates if a given file is likely to contain a form of Unicode text or not. ; ; Alternative DllCall: ; blnTestPassed = DllCall ("Unicows.dll", long : "IsTextUnicode", lpbinary : hdlBB, long : intBBSize, lpnull) ; ; (c)Detlev Dalitz.20030701.20100114. ;.......................................................................................................................................... #EndFunction ;------------------------------------------------------------------------------------------------------------------------------------------ ; Test. strFolderTemp = ShortCutDir ("Local Settings", 0, 1) : "Temp\" strFileAnsi = strFolderTemp : "out.ansi.txt" strFileUC = strFolderTemp : "out.uc.txt" intResult1 = RunHideWait ("cmd.exe", "/c dir /-p *.* >""" : strFileAnsi : """") intResult2 = RunHideWait ("cmd.exe", "/u /c dir /-p *.* >""" : strFileUC : """") strText1 = FileGet (strFileAnsi) strText2 = FileGet (strFileUC) strText1 = StrSub (strText1, 1, 70) strText2 = StrSub (strText2, 1, 70) blnResult1 = udfIsTextUnicode (strFileAnsi) ; 0 = @FALSE. blnResult2 = udfIsTextUnicode (strFileUC) ; 1 = @TRUE. strMsgTitle = "Test: Ansi - Unicode" strMsgText = "Ansi:" : @LF : """" : strText1 : """" : @LF : "IsUnicode: " : blnResult1 : @LF : @LF : "Unicode:" : @LF : """" : strText2 : """" : @LF : "IsUnicode: " : blnResult2 Message (strMsgTitle, strMsgText) Exit ;------------------------------------------------------------------------------------------------------------------------------------------ ; IsTextUnicode ; The IsTextUnicode function determines whether a buffer is likely to contain a form of Unicode text. ; The function uses various statistical and deterministic methods to make its determination, ; under the control of flags passed via lpi. ; When the function returns, the results of such tests are reported via lpi. ; ; BOOL IsTextUnicode( ; CONST VOID* pBuffer, // input buffer to be examined ; int cb, // size of input buffer ; LPINT lpi // options ; ); ; ; Parameters ; lpBuffer ; [in] Pointer to the input buffer to be examined. ; ; cb ; [in] Specifies the size, in bytes, of the input buffer pointed to by lpBuffer. ; ; lpi ; [in/out] On input, specifies the tests to be applied to the input buffer text. ; On output, receives the results of the specified tests: ; 1 if the contents of the buffer pass a test, zero for failure. ; Only flags that are set upon input to the function are significant upon output. ; ; If lpi is NULL, the function uses all available tests to determine ; whether the data in the buffer is likely to be Unicode text. ; ; This parameter can be one or more of the following values. Value Meaning: ; IS_TEXT_UNICODE_ASCII16 The text is Unicode, and contains onlyzero-extended ASCII values/characters. ; IS_TEXT_UNICODE_REVERSE_ASCII16 Same as the preceding, except that the Unicode text is byte-reversed. ; ; IS_TEXT_UNICODE_STATISTICS The text is probably Unicode, with the determination made by applying statistical analysis. Absolute certainty is not guaranteed. See the following Remarks section. ; IS_TEXT_UNICODE_REVERSE_STATISTICS Same as the preceding, except that the probably-Unicode text is byte-reversed. ; ; IS_TEXT_UNICODE_CONTROLS The text contains Unicode representations of one or more of these nonprinting characters: RETURN, LINEFEED, SPACE, CJK_SPACE, TAB. ; IS_TEXT_UNICODE_REVERSE_CONTROLS Same as the preceding, except that the Unicode characters are byte-reversed. ; ; IS_TEXT_UNICODE_BUFFER_TOO_SMALL There are too few characters in the buffer for meaningful analysis (fewer than two bytes). ; ; IS_TEXT_UNICODE_SIGNATURE The text contains the Unicode byte-order mark (BOM) 0xFEFF as its first character. ; IS_TEXT_UNICODE_REVERSE_SIGNATURE The text contains the Unicode byte-reversed byte-order mark (Reverse BOM) 0xFFFE as its first character. ; ; IS_TEXT_UNICODE_ILLEGAL_CHARS The text contains one of these Unicode-illegal characters: embedded Reverse BOM, UNICODE_NUL, CRLF (packed into one WORD), or 0xFFFF. ; IS_TEXT_UNICODE_ODD_LENGTH The number of characters in the string is odd. A string of odd length cannot (by definition) be Unicode text. ; IS_TEXT_UNICODE_NULL_BYTES The text contains null bytes, which indicate non-ASCII text. ; ; IS_TEXT_UNICODE_UNICODE_MASK This flag constant is a combination of IS_TEXT_UNICODE_ASCII16, IS_TEXT_UNICODE_STATISTICS, IS_TEXT_UNICODE_CONTROLS, IS_TEXT_UNICODE_SIGNATURE. ; IS_TEXT_UNICODE_REVERSE_MASK This flag constant is a combination of IS_TEXT_UNICODE_REVERSE_ASCII16, IS_TEXT_UNICODE_REVERSE_STATISTICS, IS_TEXT_UNICODE_REVERSE_CONTROLS, IS_TEXT_UNICODE_REVERSE_SIGNATURE. ; IS_TEXT_UNICODE_NOT_UNICODE_MASK This flag constant is a combination of IS_TEXT_UNICODE_ILLEGAL_CHARS, IS_TEXT_UNICODE_ODD_LENGTH, and two currently unused bit flags. ; IS_TEXT_UNICODE_NOT_ASCII_MASK This flag constant is a combination of IS_TEXT_UNICODE_NULL_BYTES and three currently unused bit flags. ; ; IS_TEXT_UNICODE_ASCII16 1 ; 0x0001 ; IS_TEXT_UNICODE_REVERSE_ASCII16 16 ; 0x0010 ; ; IS_TEXT_UNICODE_STATISTICS 2 ; 0x0002 ; IS_TEXT_UNICODE_REVERSE_STATISTICS 32 ; 0x0020 ; ; IS_TEXT_UNICODE_CONTROLS 4 ; 0x0004 ; IS_TEXT_UNICODE_REVERSE_CONTROLS 64 ; 0x0040 ; ; IS_TEXT_UNICODE_SIGNATURE 8 ; 0x0008 ; IS_TEXT_UNICODE_REVERSE_SIGNATURE 128 ; 0x0080 ; ; IS_TEXT_UNICODE_ILLEGAL_CHARS 256 ; 0x0100 ; IS_TEXT_UNICODE_ODD_LENGTH 512 ; 0x0200 ; IS_TEXT_UNICODE_DBCS_LEADBYTE 1024 ; 0x0400 ; IS_TEXT_UNICODE_NULL_BYTES 4096 ; 0x1000 ; ; IS_TEXT_UNICODE_UNICODE_MASK 15 ; 0x000F ; IS_TEXT_UNICODE_REVERSE_MASK 240 ; 0x00F0 ; IS_TEXT_UNICODE_NOT_UNICODE_MASK 3840 ; 0x0F00 ; IS_TEXT_UNICODE_NOT_ASCII_MASK 61440 ; 0xF000 ; ; Return Values ; The function returns a nonzero value if the data in the buffer passes the specified tests. ; The function returns zero if the data in the buffer does not pass the specified tests. ; ; Remarks ; As noted in the preceding table of flag constants, ; the IS_TEXT_UNICODE_STATISTICS and IS_TEXT_UNICODE_REVERSE_STATISTICS tests use statistical analysis. ; These tests are not foolproof. ; The statistical tests assume certain amounts of variation between low and high bytes in a string, ; and some ASCII strings can slip through. ; For example, if lpBuffer points to the ASCII string 65, 10, 13, 29 (A\n\r^Z), ; 0x41 ; 0x0A ; 0x0D ; 0x1D ; the string passes the IS_TEXT_UNICODE_STATISTICS test, though failure would be preferable. ; ; Requirements ; Windows NT/2000/XP: Included in Windows NT 3.5 and later. ; Windows 95/98/Me: Unsupported. ;..........................................................................................................................................