|
BurgerLib
|
Conversion routines to support the UTF8 format. More...
#include <ststring.h>
Public Types | |
| enum | { BAD = -1 } |
Static Public Member Functions | |
| static Word BURGER_API | IsValidSingle (const char *pInput) |
| Check a single UTF8 byte pattern for validity. | |
| static Word BURGER_API | IsValid (const char *pInput) |
| Check a UTF8 "C" string for validity. | |
| static Word BURGER_API | IsValid (const char *pInput, WordPtr uInputSize) |
| Check a UTF8 byte array for validity. | |
| static Word BURGER_API | GetTokenSize (const char *pInput) |
| Return the number of bytes a UTF8 stream occupies. | |
| static const char *BURGER_API | NextToken (const char *pInput) |
| Return the pointer to the next UTF8 token. | |
| static Word BURGER_API | FromMacRomanUS (char *pOutput, Word uInput) |
| Convert a MacRomanUS 8 bit char into a UTF8 stream. | |
| static Word BURGER_API | FromMacRomanUS (char *pOutput, WordPtr uOutputSize, const char *pInput) |
| Convert a MacRomanUS "C" string into a UTF8 stream. | |
| static Word BURGER_API | FromMacRomanUS (char *pOutput, WordPtr uOutputSize, const char *pInput, WordPtr uInputSize) |
| Convert a MacRomanUS byte array into a UTF8 stream. | |
| static Word BURGER_API | FromWin1252 (char *pOutput, Word uInput) |
| Convert a Win1252 8 bit char into a UTF8 stream. | |
| static Word BURGER_API | FromWin1252 (char *pOutput, WordPtr uOutputSize, const char *pInput) |
| Convert a Win1252 "C" string into a UTF8 stream. | |
| static Word BURGER_API | FromWin1252 (char *pOutput, WordPtr uOutputSize, const char *pInput, WordPtr uInputSize) |
| Convert a Win1252 byte array into a UTF8 stream. | |
| static Word BURGER_API | FromISOLatin1 (char *pOutput, Word uInput) |
| Convert a ISOLatin1 8 bit char into a UTF8 stream. | |
| static Word BURGER_API | FromISOLatin1 (char *pOutput, WordPtr uOutputSize, const char *pInput) |
| Convert a ISOLatin1 "C" string into a UTF8 stream. | |
| static Word BURGER_API | FromISOLatin1 (char *pOutput, WordPtr uOutputSize, const char *pInput, WordPtr uInputSize) |
| Convert a ISOLatin1 byte array into a UTF8 stream. | |
| static Word BURGER_API | FromGeneric (char *pOutput, const Word8 pTranslateTable[128][4], Word uInput) |
| Convert a generic 8 bit char into a UTF8 stream. | |
| static Word BURGER_API | FromGeneric (char *pOutput, WordPtr uOutputSize, const Word8 pTranslateTable[128][4], const char *pInput) |
| Convert a generic "C" string into a UTF8 stream. | |
| static Word BURGER_API | FromGeneric (char *pOutput, WordPtr uOutputSize, const Word8 pTranslateTable[128][4], const char *pInput, WordPtr uInputSize) |
| Convert a generic byte array into a UTF8 stream. | |
| static Word BURGER_API | FromUTF16 (char *pOutput, Word16 uInput) |
| Convert a UTF16 char into a UTF8 stream. | |
| static Word BURGER_API | FromUTF16 (char *pOutput, WordPtr uOutputSize, const Word16 *pInput) |
| Convert a UTF16 "C" string into a UTF8 stream. | |
| static Word BURGER_API | FromUTF16 (char *pOutput, WordPtr uOutputSize, const Word16 *pInput, WordPtr uInputSize) |
| Convert a UTF16 Word16 array into a UTF8 stream. | |
| static Word BURGER_API | FromUTF32 (char *pOutput, Word32 uInput) |
| Convert a UTF32 value into a UTF8 stream. | |
| static Word BURGER_API | FromUTF32 (char *pOutput, WordPtr uOutputSize, const Word32 *pInput) |
| Convert a UTF32 "C" string into a UTF8 stream. | |
| static Word BURGER_API | FromUTF32 (char *pOutput, WordPtr uOutputSize, const Word32 *pInput, WordPtr uInputSize) |
| Convert a UTF32 Word32 array into a UTF8 stream. | |
| static Word BURGER_API | ToGeneric (const char *pInput, const Word8 pTranslateTable[128][4]) |
| Convert a UTF8 stream into a generic 8 bit char. | |
| static Word BURGER_API | ToGeneric (char *pOutput, WordPtr uOutputSize, const Word8 pTranslateTable[128][4], const char *pInput) |
| Convert a UTF8 stream into a generic "C" string. | |
| static Word BURGER_API | ToGeneric (char *pOutput, WordPtr uOutputSize, const Word8 pTranslateTable[128][4], const char *pInput, WordPtr uInputSize) |
| Convert a UTF8 stream into a generic byte array. | |
Static Public Attributes | |
| static const Word8 | FromMacRomanUSTable [128][4] |
| Table to convert MacRomanUS to UTF8. | |
| static const Word8 | FromWin1252Table [128][4] |
| Table to convert Win1252 to UTF8. | |
| static const Word8 | FromISOLatin1Table [128][4] |
| Table to convert ISOLatin1 to UTF8. | |
| static const Word8 | TokenSizeTable [256] |
| Table to determine the size of a UTF8 token stream. | |
| static const Word8 | ByteOrderMark [3] = {0xEF,0xBB,0xBF} |
| UTF8 text file signature. | |
Conversion routines to support the UTF8 format.
UTF8 is a format that allows Unicode data to be stored in a standard "C" string with little modification to most existing string managers. All Burgerlib functions accept UTF8 strings so that they can properly present international characters in a consistant manner across numerous target platforms.
| anonymous enum |
| Word BURGER_API Burger::UTF8::FromGeneric | ( | char * | pOutput, |
| const Word8 | pTranslateTable[128][4], | ||
| Word | uInput | ||
| ) | [static] |
Convert a generic 8 bit char into a UTF8 stream.
Take the unsigned 8 bit value of the generic character and convert it to a 1 to 4 byte UTF8 stream. Codes 0 through 0x7f are considered ASCII while codes 0x80 through 0xFF will be found in the supplied table.
The user supplied must contain valid UTF8 byte patterns. This routine will not perform validation on the contents of the table and if the table has bad data, the UTF8 stream produced by this function will be error prone.
| pOutput | Pointer to UTF8 buffer that's a minimum 5 bytes in size. NULL will page fault. |
| pTranslateTable | Pointer to a 128x4 array to use as a UTF8 conversion table. |
| uInput | Generic encoded 8 bit character |
| Word BURGER_API Burger::UTF8::FromGeneric | ( | char * | pOutput, |
| WordPtr | uOutputSize, | ||
| const Word8 | pTranslateTable[128][4], | ||
| const char * | pInput | ||
| ) | [static] |
Convert a generic "C" string into a UTF8 stream.
Take a "C" string that is using generic encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be null to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
The user supplied must contain valid UTF8 byte patterns. This routine will not perform validation on the contents of the table and if the table has bad data, the UTF8 stream produced by this function will be error prone.
| pOutput | Pointer to UTF8 buffer to receive the converted string. NULL is okay if uOutputSize is zero, otherwise it will page fault. |
| uOutputSize | Size of the output buffer in bytes. |
| pTranslateTable | Pointer to a 128x4 array to use as a UTF8 conversion table. |
| pInput | A generic encoded "C" string. NULL will page fault. |
| Word BURGER_API Burger::UTF8::FromGeneric | ( | char * | pOutput, |
| WordPtr | uOutputSize, | ||
| const Word8 | pTranslateTable[128][4], | ||
| const char * | pInput, | ||
| WordPtr | uInputSize | ||
| ) | [static] |
Convert a generic byte array into a UTF8 stream.
Take a byte array that is using generic encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be null to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
The user supplied must contain valid UTF8 byte patterns. This routine will not perform validation on the contents of the table and if the table has bad data, the UTF8 stream produced by this function will be error prone.
| pOutput | Pointer to UTF8 buffer to receive the converted string. NULL is okay if uOutputSize is zero, outwise a page fault will occur. |
| uOutputSize | Size of the output buffer in bytes. |
| pTranslateTable | Pointer to a 128x4 array to use as a UTF8 conversion table. |
| pInput | Generic encoded byte array. NULL is okay if uInputSize is zero. |
| uInputSize | Size of the input byte array |
| Word BURGER_API Burger::UTF8::FromISOLatin1 | ( | char * | pOutput, |
| Word | uInput | ||
| ) | [static] |
Convert a ISOLatin1 8 bit char into a UTF8 stream.
Take the unsigned 8 bit value of the ISOLatin1 character and convert it to a 1 or 2 byte UTF8 stream.
| pOutput | Pointer to UTF8 buffer that's a minimum 3 bytes in size. NULL will page fault. |
| uInput | ISOLatin1 encoded 8 bit character |
| Word BURGER_API Burger::UTF8::FromISOLatin1 | ( | char * | pOutput, |
| WordPtr | uOutputSize, | ||
| const char * | pInput | ||
| ) | [static] |
Convert a ISOLatin1 "C" string into a UTF8 stream.
Take a "C" string that is using ISOLatin1 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be null to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
| pOutput | Pointer to UTF8 buffer to receive the converted string. NULL is okay if uOutputSize is zero, otherwise it will page fault. |
| uOutputSize | Size of the output buffer in bytes. |
| pInput | ISOLatin1 encoded "C" string. NULL will page fault. |
| Word BURGER_API Burger::UTF8::FromISOLatin1 | ( | char * | pOutput, |
| WordPtr | uOutputSize, | ||
| const char * | pInput, | ||
| WordPtr | uInputSize | ||
| ) | [static] |
Convert a ISOLatin1 byte array into a UTF8 stream.
Take a byte array that is using ISOLatin1 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be null to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
| pOutput | Pointer to UTF8 buffer to receive the converted string. NULL is okay if uOutputSize is zero, outwise a page fault will occur. |
| uOutputSize | Size of the output buffer in bytes. |
| pInput | ISOLatin1 encoded byte array. NULL is okay if uInputSize is zero. |
| uInputSize | Size of the input byte array |
| Word BURGER_API Burger::UTF8::FromMacRomanUS | ( | char * | pOutput, |
| Word | uInput | ||
| ) | [static] |
Convert a MacRomanUS 8 bit char into a UTF8 stream.
Take the unsigned 8 bit value of the MacRomanUS character and convert it to a 1,2 or 3 byte UTF8 stream.
| pOutput | Pointer to UTF8 buffer that's a minimum 4 bytes in size. NULL will page fault. |
| uInput | MacRomanUS encoded 8 bit character |
| Word BURGER_API Burger::UTF8::FromMacRomanUS | ( | char * | pOutput, |
| WordPtr | uOutputSize, | ||
| const char * | pInput | ||
| ) | [static] |
Convert a MacRomanUS "C" string into a UTF8 stream.
Take a "C" string that is using MacRomanUS encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be null to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
| pOutput | Pointer to UTF8 buffer to receive the converted string. NULL is okay if uOutputSize is zero, otherwise it will page fault. |
| uOutputSize | Size of the output buffer in bytes. |
| pInput | MacRomanUS encoded "C" string. NULL will page fault. |
| Word BURGER_API Burger::UTF8::FromMacRomanUS | ( | char * | pOutput, |
| WordPtr | uOutputSize, | ||
| const char * | pInput, | ||
| WordPtr | uInputSize | ||
| ) | [static] |
Convert a MacRomanUS byte array into a UTF8 stream.
Take a byte array that is using MacRomanUS encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be null to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
| pOutput | Pointer to UTF8 buffer to receive the converted string. NULL is okay if uOutputSize is zero, outwise a page fault will occur. |
| uOutputSize | Size of the output buffer in bytes. |
| pInput | MacRomanUS encoded byte array. NULL is okay if uInputSize is zero. |
| uInputSize | Size of the input byte array |
| Word BURGER_API Burger::UTF8::FromUTF16 | ( | char * | pOutput, |
| Word16 | uInput | ||
| ) | [static] |
Convert a UTF16 char into a UTF8 stream.
Take the unsigned 16 bit value of the UTF16 character and convert it to a 1,2 or 3 byte UTF8 stream.
| pOutput | Pointer to UTF8 buffer that's a minimum 4 bytes in size. NULL will page fault. |
| uInput | UTF16 encoded 16 bit character |
| Word BURGER_API Burger::UTF8::FromUTF16 | ( | char * | pOutput, |
| WordPtr | uOutputSize, | ||
| const Word16 * | pInput | ||
| ) | [static] |
Convert a UTF16 "C" string into a UTF8 stream.
Take a "C" string that is using UTF16 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be null to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
UTF16 surrogate pairs will be properly parsed and encoded into their UTF8 equivalents.
| pOutput | Pointer to UTF8 buffer to receive the converted string. NULL is okay if uOutputSize is zero, otherwise it will page fault. |
| uOutputSize | Size of the output buffer in bytes. |
| pInput | UTF16 encoded "C" string. NULL will page fault. |
| Word BURGER_API Burger::UTF8::FromUTF16 | ( | char * | pOutput, |
| WordPtr | uOutputSize, | ||
| const Word16 * | pInput, | ||
| WordPtr | uInputSize | ||
| ) | [static] |
Convert a UTF16 Word16 array into a UTF8 stream.
Take a Word16 array that is using UTF16 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be null to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
UTF16 surrogate pairs will be properly parsed and encoded into their UTF8 equivalents.
| pOutput | Pointer to UTF8 buffer to receive the converted string. NULL is okay if uOutputSize is zero, outwise a page fault will occur. |
| uOutputSize | Size of the output buffer in bytes. |
| pInput | UTF16 encoded Word16 array. NULL is okay if uInputSize is zero. |
| uInputSize | Size of the input Word16 array in bytes |
| Word BURGER_API Burger::UTF8::FromUTF32 | ( | char * | pOutput, |
| Word32 | uInput | ||
| ) | [static] |
Convert a UTF32 value into a UTF8 stream.
Given a valid UTF32 value (0-0xD7FF / 0xE000-0x10FFFF), encode it into a valid UTF8 stream. If the value is invalid, it will NOT be encoded.
The output buffer must have at least 5 bytes available.
| pOutput | Pointer to a char buffer of a minimum of 5 bytes in size. NULL is invalid. |
| uInput | UTF32 encoded character value. |
| Word BURGER_API Burger::UTF8::FromUTF32 | ( | char * | pOutput, |
| WordPtr | uOutputSize, | ||
| const Word32 * | pInput | ||
| ) | [static] |
Convert a UTF32 "C" string into a UTF8 stream.
Take a "C" string that is using UTF32 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be null to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
| pOutput | Pointer to UTF8 buffer to receive the converted string. NULL is okay if uOutputSize is zero, otherwise it will page fault. |
| uOutputSize | Size of the output buffer in bytes. |
| pInput | UTF32 encoded "C" string. NULL will page fault. |
| Word BURGER_API Burger::UTF8::FromUTF32 | ( | char * | pOutput, |
| WordPtr | uOutputSize, | ||
| const Word32 * | pInput, | ||
| WordPtr | uInputSize | ||
| ) | [static] |
Convert a UTF32 Word32 array into a UTF8 stream.
Take a Word32 array that is using UTF32 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be null to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
| pOutput | Pointer to UTF8 buffer to receive the converted string. NULL is okay if uOutputSize is zero, outwise a page fault will occur. |
| uOutputSize | Size of the output buffer in bytes. |
| pInput | UTF32 encoded Word32 array. NULL is okay if uInputSize is zero. |
| uInputSize | Size of the input Word16 array in bytes |
| Word BURGER_API Burger::UTF8::FromWin1252 | ( | char * | pOutput, |
| Word | uInput | ||
| ) | [static] |
Convert a Win1252 8 bit char into a UTF8 stream.
Take the unsigned 8 bit value of the Win1252 character and convert it to a 1,2 or 3 byte UTF8 stream.
| pOutput | Pointer to UTF8 buffer that's a minimum 4 bytes in size. NULL will page fault. |
| uInput | Win1252 encoded 8 bit character |
| Word BURGER_API Burger::UTF8::FromWin1252 | ( | char * | pOutput, |
| WordPtr | uOutputSize, | ||
| const char * | pInput | ||
| ) | [static] |
Convert a Win1252 "C" string into a UTF8 stream.
Take a "C" string that is using Win1252 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be null to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
| pOutput | Pointer to UTF8 buffer to receive the converted string. NULL is okay if uOutputSize is zero, otherwise it will page fault. |
| uOutputSize | Size of the output buffer in bytes. |
| pInput | Win1252 encoded "C" string. NULL will page fault. |
| Word BURGER_API Burger::UTF8::FromWin1252 | ( | char * | pOutput, |
| WordPtr | uOutputSize, | ||
| const char * | pInput, | ||
| WordPtr | uInputSize | ||
| ) | [static] |
Convert a Win1252 byte array into a UTF8 stream.
Take a byte array that is using Win1252 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be null to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
| pOutput | Pointer to UTF8 buffer to receive the converted string. NULL is okay if uOutputSize is zero, outwise a page fault will occur. |
| uOutputSize | Size of the output buffer in bytes. |
| pInput | Win1252 encoded byte array. NULL is okay if uInputSize is zero. |
| uInputSize | Size of the input byte array |
| Word BURGER_API Burger::UTF8::GetTokenSize | ( | const char * | pInput | ) | [static] |
Return the number of bytes a UTF8 stream occupies.
Check the UTF8 stream and determine if it's 1-4 bytes in length. No invalid data checking is performed. Use Burger::UTF8::IsValidSingle(const char *) instead.
| pInput | Pointer to UTF8 data. NULL will page fault. |
| Word BURGER_API Burger::UTF8::IsValid | ( | const char * | pInput | ) | [static] |
Check a UTF8 "C" string for validity.
Check a "C" string to see if it's a valid UTF8 stream. Return false if there was an error, or true if the bytes represent a valid UTF8 pattern.
| pInput | Pointer to a zero terminated string. NULL will page fault. |
| Word BURGER_API Burger::UTF8::IsValid | ( | const char * | pInput, |
| WordPtr | uInputSize | ||
| ) | [static] |
Check a UTF8 byte array for validity.
Check a byte array and see if it's a valid UTF8 stream. Return false if there was an error, or true if the bytes represent a valid UTF8 pattern.
| pInput | Pointer to UTF8 data. Can be NULL if uInputSize is zero, otherwise page fault. |
| uInputSize | Length of the data in bytes, if zero, then the function will return true. |
| Word BURGER_API Burger::UTF8::IsValidSingle | ( | const char * | pInput | ) | [static] |
Check a single UTF8 byte pattern for validity.
Check the next 1 to 4 bytes to see if it comprises a valid UTF8 byte pattern and return true if they are, false, if not.
Since UTF8 streams are variable length, there is no function that can take a singular value and check it for validity, you must use this function for single cases or Burger::UTF8::IsValid(const char *) for multi-character streams.
| pInput | Pointer to a stream of 1 to 4 UTF8 encoded bytes. NULL will page fault. |
| const char *BURGER_API Burger::UTF8::NextToken | ( | const char * | pInput | ) | [static] |
Return the pointer to the next UTF8 token.
Check the UTF8 stream and determine if it's 1-4 bytes in length, then return the supplied pointer incremented by that length. No invalid data checking is performed. Use Burger::UTF8::IsValidSingle(const char *) instead.
| pInput | Pointer to UTF8 data. NULL will page fault. |
| Word BURGER_API Burger::UTF8::ToGeneric | ( | const char * | pInput, |
| const Word8 | pTranslateTable[128][4] | ||
| ) | [static] |
Convert a UTF8 stream into a generic 8 bit char.
Take a 1 to 4 byte UTF8 stream and look up the unsigned 8 bit value of the generic character. Codes 0 through 0x7f are considered ASCII while codes 0x80 through 0xFF will be found in the supplied table.
| pInput | Pointer to UTF8 buffer that contains tha stream valid to convert. NULL will page fault. |
| pTranslateTable | Pointer to a 128x4 array to use as a UTF8 conversion table. |
| Word BURGER_API Burger::UTF8::ToGeneric | ( | char * | pOutput, |
| WordPtr | uOutputSize, | ||
| const Word8 | pTranslateTable[128][4], | ||
| const char * | pInput | ||
| ) | [static] |
Convert a UTF8 stream into a generic "C" string.
Take a "C" string that is using UTF8 encoding and convert it to a generic encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be null to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
| pOutput | Pointer to byte buffer to receive the converted string. NULL is okay if uOutputSize is zero, otherwise it will page fault. |
| uOutputSize | Size of the output buffer in bytes. |
| pTranslateTable | Pointer to a 128x4 array to use as a UTF8 conversion table. |
| pInput | A UTF8 encoded "C" string. NULL will page fault. |
| Word BURGER_API Burger::UTF8::ToGeneric | ( | char * | pOutput, |
| WordPtr | uOutputSize, | ||
| const Word8 | pTranslateTable[128][4], | ||
| const char * | pInput, | ||
| WordPtr | uInputSize | ||
| ) | [static] |
Convert a UTF8 stream into a generic byte array.
Take a byte array that is using UTF8 encoding and convert it to a generic encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be null to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
| pOutput | Pointer to a byte buffer to receive the converted string. NULL is okay if uOutputSize is zero, outwise a page fault will occur. |
| uOutputSize | Size of the output buffer in bytes. |
| pTranslateTable | Pointer to a 128x4 array to use as a UTF8 conversion table. |
| pInput | UTF8 encoded byte array. NULL is okay if uInputSize is zero. |
| uInputSize | Size of the input byte array |
const Word8 Burger::UTF8::ByteOrderMark = {0xEF,0xBB,0xBF} [static] |
UTF8 text file signature.
If a raw text file starts with this three byte pattern, you're supposed to assume that all of the text that follows is encoded with UTF8.
Note: An explanation is found here at Unicode.org
const Word8 Burger::UTF8::FromISOLatin1Table [static] |
const Word8 Burger::UTF8::FromMacRomanUSTable [static] |
Table to convert MacRomanUS to UTF8.
This 128x4 array holds the 128 high ascii codes for MacRomanUS converted to UTF8. All 128 codes are present in Unicode except for code 0xF0 which is the closed Apple symbol.
The character map for MacRomanUS looks like this. Click here.
const Word8 Burger::UTF8::FromWin1252Table [static] |
Table to convert Win1252 to UTF8.
This 128x4 array holds the 128 high ascii codes for Win1252 converted to UTF8. Some unprintable codes don't exist and are marked as themselves for the pattern. These are codes 0x81, 0x8D, 0x8F, 0x90 and 0x9D.
The character map for Win1252 looks like this. Click here.
const Word8 Burger::UTF8::TokenSizeTable [static] |
{
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,
0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,0x02,
0x03,0x03,0x03,0x03,0x03,0x03,0x03,0x03,0x03,0x03,0x03,0x03,0x03,0x03,0x03,0x03,
0x04,0x04,0x04,0x04,0x04,0x04,0x04,0x04,0x04,0x04,0x04,0x04,0x04,0x04,0x04,0x04
}
Table to determine the size of a UTF8 token stream.
Using the first byte as an index, obtain the size of the stream in bytes from this table. Entries are the numbers 1-4. This table shouldn't be used for error checking and it's only for quick look ups on valid UTF8 streams.
1.8.0