Conversion routines to support the UTF8 format. More...
Static Public Member Functions | |
static uint_t | is_valid_single (const char *pInput) noexcept |
Check a single UTF8 byte pattern for validity. | |
static uint_t | is_valid (const char *pInput) noexcept |
Check a UTF8 "C" string for validity. | |
static uint_t | is_valid (const char *pInput, uintptr_t uInputSize) noexcept |
Check a UTF8 byte array for validity. | |
static uintptr_t | GetTokenSize (const char *pInput) noexcept |
Return the number of bytes a UTF8 stream occupies. | |
static const char * | NextToken (const char *pInput) noexcept |
Return the pointer to the next UTF8 token. | |
static uintptr_t | GetMacRomanUSSize (uint_t uInput) noexcept |
Determine the size of the UTF8 stream for a MacRomanUS char. | |
static uintptr_t | GetMacRomanUSSize (const char *pInput) noexcept |
Determine the size of the UTF8 stream for a MacRomanUS "C" string. | |
static uintptr_t | GetMacRomanUSSize (const char *pInput, uintptr_t uInputSize) noexcept |
Determine the size of the UTF8 stream for a MacRomanUS buffer. | |
static uintptr_t | FromMacRomanUS (char *pOutput, uint_t uInput) noexcept |
Convert a MacRomanUS 8 bit char into a UTF8 stream. | |
static uintptr_t | FromMacRomanUS (char *pOutput, uintptr_t uOutputSize, const char *pInput) noexcept |
Convert a MacRomanUS "C" string into a UTF8 stream. | |
static uintptr_t | FromMacRomanUS (char *pOutput, uintptr_t uOutputSize, const char *pInput, uintptr_t uInputSize) noexcept |
Convert a MacRomanUS byte array into a UTF8 stream. | |
static uintptr_t | GetWin1252Size (uint_t uInput) noexcept |
Determine the size of the UTF8 stream for a Win1252 char. | |
static uintptr_t | GetWin1252Size (const char *pInput) noexcept |
Determine the size of the UTF8 stream for a Win1252 "C" string. | |
static uintptr_t | GetWin1252Size (const char *pInput, uintptr_t uInputSize) noexcept |
Determine the size of the UTF8 stream for a Win1252 buffer. | |
static uintptr_t | FromWin1252 (char *pOutput, uint_t uInput) noexcept |
Convert a Win1252 8 bit char into a UTF8 stream. | |
static uintptr_t | FromWin1252 (char *pOutput, uintptr_t uOutputSize, const char *pInput) noexcept |
Convert a Win1252 "C" string into a UTF8 stream. | |
static uintptr_t | FromWin1252 (char *pOutput, uintptr_t uOutputSize, const char *pInput, uintptr_t uInputSize) noexcept |
Convert a Win1252 byte array into a UTF8 stream. | |
static uintptr_t | GetWin437Size (uint_t uInput) noexcept |
Determine the size of the UTF8 stream for a Win437 char. | |
static uintptr_t | GetWin437Size (const char *pInput) noexcept |
Determine the size of the UTF8 stream for a Win437 "C" string. | |
static uintptr_t | GetWin437Size (const char *pInput, uintptr_t uInputSize) noexcept |
Determine the size of the UTF8 stream for a Win437 buffer. | |
static uintptr_t | FromWin437 (char *pOutput, uint_t uInput) noexcept |
Convert a Win437 8 bit char into a UTF8 stream. | |
static uintptr_t | FromWin437 (char *pOutput, uintptr_t uOutputSize, const char *pInput) noexcept |
Convert a Win437 "C" string into a UTF8 stream. | |
static uintptr_t | FromWin437 (char *pOutput, uintptr_t uOutputSize, const char *pInput, uintptr_t uInputSize) noexcept |
Convert a Win437 byte array into a UTF8 stream. | |
static uintptr_t | GetISOLatin1Size (uint_t uInput) noexcept |
Determine the size of the UTF8 stream for a ISOLatin1 char. | |
static uintptr_t | GetISOLatin1Size (const char *pInput) noexcept |
Determine the size of the UTF8 stream for a ISOLatin1 "C" string. | |
static uintptr_t | GetISOLatin1Size (const char *pInput, uintptr_t uInputSize) noexcept |
Determine the size of the UTF8 stream for a ISOLatin1 buffer. | |
static uintptr_t | FromISOLatin1 (char *pOutput, uint_t uInput) noexcept |
Convert a ISOLatin1 8 bit char into a UTF8 stream. | |
static uintptr_t | FromISOLatin1 (char *pOutput, uintptr_t uOutputSize, const char *pInput) noexcept |
Convert a ISOLatin1 "C" string into a UTF8 stream. | |
static uintptr_t | FromISOLatin1 (char *pOutput, uintptr_t uOutputSize, const char *pInput, uintptr_t uInputSize) noexcept |
Convert a ISOLatin1 byte array into a UTF8 stream. | |
static uintptr_t | GetGenericSize (const uint8_t pTranslateTable[128][4], uint_t uInput) noexcept |
Determine the size of the UTF8 stream from a char. | |
static uintptr_t | GetGenericSize (const uint8_t pTranslateTable[128][4], const char *pInput) noexcept |
Determine the size of the UTF8 stream for a Win1252 "C" string. | |
static uintptr_t | GetGenericSize (const uint8_t pTranslateTable[128][4], const char *pInput, uintptr_t uInputSize) noexcept |
Determine the size of the UTF8 stream for a Win1252 buffer. | |
static uintptr_t | FromGeneric (char *pOutput, const uint8_t pTranslateTable[128][4], uint_t uInput) noexcept |
Convert a generic 8 bit char into a UTF8 stream. | |
static uintptr_t | FromGeneric (char *pOutput, uintptr_t uOutputSize, const uint8_t pTranslateTable[128][4], const char *pInput) noexcept |
Convert a generic "C" string into a UTF8 stream. | |
static uintptr_t | FromGeneric (char *pOutput, uintptr_t uOutputSize, const uint8_t pTranslateTable[128][4], const char *pInput, uintptr_t uInputSize) noexcept |
Convert a generic byte array into a UTF8 stream. | |
static uintptr_t | GetUTF16Size (uint_t uInput) noexcept |
Determine the size of the UTF8 stream for a UTF16 value. | |
static uintptr_t | GetUTF16Size (const uint16_t *pInput) noexcept |
Determine the size of the UTF8 stream for a UTF16 "C" string. | |
static uintptr_t | GetUTF16Size (const uint16_t *pInput, uintptr_t uInputSize) noexcept |
Determine the size of the UTF8 stream for a UTF16 buffer. | |
static uintptr_t | from_UTF16 (char *pOutput, uint16_t uInput) noexcept |
Convert a UTF16 char into a UTF8 stream. | |
static uintptr_t | from_UTF16 (char *pOutput, uintptr_t uOutputSize, const uint16_t *pInput) noexcept |
Convert a UTF16 "C" string into a UTF8 stream. | |
static uintptr_t | from_UTF16 (char *pOutput, uintptr_t uOutputSize, const uint16_t *pInput, uintptr_t uInputSize) noexcept |
Convert a UTF16 uint16_t array into a UTF8 stream. | |
static char * | from_UTF16 (const uint16_t *pInput) noexcept |
Convert a UTF16 "C" string into an allocated UTF8 "C" string. | |
static uintptr_t | from_UTF32 (char *pOutput, uint32_t uInput) noexcept |
Convert a UTF32 value into a UTF8 stream. | |
static uintptr_t | from_UTF32 (char *pOutput, uintptr_t uOutputSize, const uint32_t *pInput) noexcept |
Convert a UTF32 "C" string into a UTF8 stream. | |
static uintptr_t | from_UTF32 (char *pOutput, uintptr_t uOutputSize, const uint32_t *pInput, uintptr_t uInputSize) noexcept |
Convert a UTF32 uint32_t array into a UTF8 stream. | |
static char * | from_UTF32 (const uint32_t *pInput) noexcept |
Convert a UTF32 "C" string into an allocated UTF8 "C" string. | |
static uint_t | ToGeneric (const char *pInput, const uint8_t pTranslateTable[128][4]) noexcept |
Convert a UTF8 stream into a generic 8 bit char. | |
static uintptr_t | ToGeneric (char *pOutput, uintptr_t uOutputSize, const uint8_t pTranslateTable[128][4], const char *pInput) noexcept |
Convert a UTF8 stream into a generic "C" string. | |
static uintptr_t | ToGeneric (char *pOutput, uintptr_t uOutputSize, const uint8_t pTranslateTable[128][4], const char *pInput, uintptr_t uInputSize) noexcept |
Convert a UTF8 stream into a generic byte array. | |
Static Public Attributes | |
static const uint_t | kInvalid = UINT32_MAX |
Value returned if a routine failed. | |
static const uint8_t | TokenSizeTable [256] |
Table to determine the size of a UTF8 token stream. | |
static const uint8_t | ByteOrderMark [3] = {0xEF, 0xBB, 0xBF} |
UTF8 text file signature. | |
Conversion routines to support the UTF8 format.
UTF8 is a format that allows Unicode data to be stored in a standard "C" string with little modification to most existing string managers. All Burgerlib functions accept UTF8 strings so that they can properly present international characters in a consistent manner across numerous target platforms.
|
staticnoexcept |
Convert a UTF16 char into a UTF8 stream.
Take the unsigned 16 bit value of the UTF16 character and convert it to a 1,2 or 3 byte UTF8 stream.
pOutput | Pointer to UTF8 buffer that's a minimum 4 bytes in size, nullptr will page fault. |
uInput | UTF16 encoded 16 bit character |
|
staticnoexcept |
Convert a UTF16 "C" string into a UTF8 stream.
Take a "C" string that is using UTF16 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be NULL to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
UTF16 surrogate pairs will be properly parsed and encoded into their UTF8 equivalents.
pOutput | Pointer to UTF8 buffer to receive the converted string, nullptr is okay if uOutputSize is zero, otherwise it will page fault. |
uOutputSize | Size of the output buffer in bytes. |
pInput | UTF16 encoded "C" string, nullptr will page fault. |
|
staticnoexcept |
Convert a UTF16 uint16_t array into a UTF8 stream.
Take a uint16_t array that is using UTF16 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr
to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
UTF16 surrogate pairs will be properly parsed and encoded into their UTF8 equivalents.
pOutput | Pointer to UTF8 buffer to receive the converted string, nullptr is okay if uOutputSize is zero, otherwise a page fault will occur. |
uOutputSize | Size of the output buffer in bytes. |
pInput | UTF16 encoded uint16_t array, nullptr is okay if uInputSize is zero. |
uInputSize | Size of the input uint16_t array in elements |
|
staticnoexcept |
Convert a UTF16 "C" string into an allocated UTF8 "C" string.
Take a "C" string that is using UTF16 encoding and convert it to a UTF8 encoded "C" string. The function will allocate a buffer large enough to store the string. When the string isn't needed anymore, release the memory with a call to Burger::Free(const void *)
UTF16 surrogate pairs will be properly parsed and encoded into their UTF8 equivalents.
pInput | UTF16 encoded "C" string, nullptr will page fault. |
nullptr
if memory allocation failure.
|
staticnoexcept |
Convert a UTF32 value into a UTF8 stream.
Given a valid UTF32 value (0-0xD7FF / 0xE000-0x10FFFF), encode it into a valid UTF8 stream. If the value is invalid, it will NOT be encoded.
The output buffer must have at least 5 bytes available.
pOutput | Pointer to a char buffer of a minimum of 5 bytes in size, nullptr is invalid. |
uInput | UTF32 encoded character value. |
|
staticnoexcept |
Convert a UTF32 "C" string into a UTF8 stream.
Take a "C" string that is using UTF32 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr
to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
pOutput | Pointer to UTF8 buffer to receive the converted string, nullptr is okay if uOutputSize is zero, otherwise it will page fault. |
uOutputSize | Size of the output buffer in bytes. |
pInput | UTF32 encoded "C" string, nullptr will page fault. |
|
staticnoexcept |
Convert a UTF32 uint32_t array into a UTF8 stream.
Take a uint32_t array that is using UTF32 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr
to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
pOutput | Pointer to UTF8 buffer to receive the converted string, nullptr is okay if uOutputSize is zero, otherwise a page fault will occur. |
uOutputSize | Size of the output buffer in bytes. |
pInput | UTF32 encoded uint32_t array, nullptr is okay if uInputSize is zero. |
uInputSize | Size of the input uint32_t array in bytes |
|
staticnoexcept |
Convert a UTF32 "C" string into an allocated UTF8 "C" string.
Take a "C" string that is using UTF32 encoding and convert it to a UTF8 encoded "C" string. The function will allocate a buffer large enough to store the string. When the string isn't needed anymore, release the memory with a call to Burger::Free(const void *)
pInput | UTF32 encoded "C" string, nullptr will page fault. |
nullptr
if memory allocation failure.
|
staticnoexcept |
Convert a generic 8 bit char into a UTF8 stream.
Take the unsigned 8 bit value of the generic character and convert it to a 1 to 4 byte UTF8 stream. Codes 0 through 0x7f are considered ASCII while codes 0x80 through 0xFF will be found in the supplied table.
The user supplied must contain valid UTF8 byte patterns. This routine will not perform validation on the contents of the table and if the table has bad data, the UTF8 stream produced by this function will be error prone.
pOutput | Pointer to UTF8 buffer that's a minimum 5 bytes in size, nullptr will page fault. |
pTranslateTable | Pointer to a 128x4 array to use as a UTF8 conversion table. |
uInput | Generic encoded 8 bit character |
|
staticnoexcept |
Convert a generic "C" string into a UTF8 stream.
Take a "C" string that is using generic encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr
to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
The user supplied must contain valid UTF8 byte patterns. This routine will not perform validation on the contents of the table and if the table has bad data, the UTF8 stream produced by this function will be error prone.
pOutput | Pointer to UTF8 buffer to receive the converted string, nullptr is okay if uOutputSize is zero, otherwise it will page fault. |
uOutputSize | Size of the output buffer in bytes. |
pTranslateTable | Pointer to a 128x4 array to use as a UTF8 conversion table. |
pInput | A generic encoded "C" string, nullptr will page fault. |
|
staticnoexcept |
Convert a generic byte array into a UTF8 stream.
Take a byte array that is using generic encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr
to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
The user supplied must contain valid UTF8 byte patterns. This routine will not perform validation on the contents of the table and if the table has bad data, the UTF8 stream produced by this function will be error prone.
pOutput | Pointer to UTF8 buffer to receive the converted string, nullptr is okay if uOutputSize is zero, otherwise a page fault will occur. |
uOutputSize | Size of the output buffer in bytes. |
pTranslateTable | Pointer to a 128x4 array to use as a UTF8 conversion table. |
pInput | Generic encoded byte array, nullptr is okay if uInputSize is zero. |
uInputSize | Size of the input byte array |
|
staticnoexcept |
Convert a ISOLatin1 8 bit char into a UTF8 stream.
Take the unsigned 8 bit value of the ISOLatin1 character and convert it to a 1 or 2 byte UTF8 stream.
pOutput | Pointer to UTF8 buffer that's a minimum 3 bytes in size, nullptr will page fault. |
uInput | ISOLatin1 encoded 8 bit character |
|
staticnoexcept |
Convert a ISOLatin1 "C" string into a UTF8 stream.
Take a "C" string that is using ISOLatin1 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr
to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
pOutput | Pointer to UTF8 buffer to receive the converted string, nullptr is okay if uOutputSize is zero, otherwise it will page fault. |
uOutputSize | Size of the output buffer in bytes. |
pInput | ISOLatin1 encoded "C" string, nullptr will page fault. |
|
staticnoexcept |
Convert a ISOLatin1 byte array into a UTF8 stream.
Take a byte array that is using ISOLatin1 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr
to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
pOutput | Pointer to UTF8 buffer to receive the converted string, nullptr is okay if uOutputSize is zero, otherwise a page fault will occur. |
uOutputSize | Size of the output buffer in bytes. |
pInput | ISOLatin1 encoded byte array, nullptr is okay if uInputSize is zero. |
uInputSize | Size of the input byte array |
|
staticnoexcept |
Convert a MacRomanUS 8 bit char into a UTF8 stream.
Take the unsigned 8 bit value of the MacRomanUS character and convert it to a 1, 2 or 3 byte UTF8 stream. Only the UTF8 characters are written.
pOutput | Pointer to UTF8 buffer that's a minimum 4 bytes in size, nullptr will page fault. |
uInput | MacRomanUS encoded 8 bit character |
|
staticnoexcept |
Convert a MacRomanUS "C" string into a UTF8 stream.
Take a "C" string that is using MacRomanUS encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr
to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
pOutput | Pointer to UTF8 buffer to receive the converted string, nullptr is okay if uOutputSize is zero, otherwise it will page fault. |
uOutputSize | Size of the output buffer in bytes. |
pInput | MacRomanUS encoded "C" string, nullptr will page fault. |
|
staticnoexcept |
Convert a MacRomanUS byte array into a UTF8 stream.
Take a byte array that is using MacRomanUS encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr
to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
pOutput | Pointer to UTF8 buffer to receive the converted string, nullptr is okay if uOutputSize is zero, otherwise a page fault will occur. |
uOutputSize | Size of the output buffer in bytes. |
pInput | MacRomanUS encoded byte array, nullptr is okay if uInputSize is zero. |
uInputSize | Size of the input byte array |
|
staticnoexcept |
Convert a Win1252 8 bit char into a UTF8 stream.
Take the unsigned 8 bit value of the Win1252 character and convert it to a 1, 2 or 3 byte UTF8 stream.
pOutput | Pointer to UTF8 buffer that's a minimum 4 bytes in size, nullptr will page fault. |
uInput | Win1252 encoded 8 bit character |
|
staticnoexcept |
Convert a Win1252 "C" string into a UTF8 stream.
Take a "C" string that is using Win1252 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr
to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
pOutput | Pointer to UTF8 buffer to receive the converted string, nullptr is okay if uOutputSize is zero, otherwise it will page fault. |
uOutputSize | Size of the output buffer in bytes. |
pInput | Win1252 encoded "C" string, nullptr will page fault. |
|
staticnoexcept |
Convert a Win1252 byte array into a UTF8 stream.
Take a byte array that is using Win1252 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr
to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
pOutput | Pointer to UTF8 buffer to receive the converted string, nullptr is okay if uOutputSize is zero, otherwise a page fault will occur. |
uOutputSize | Size of the output buffer in bytes. |
pInput | Win1252 encoded byte array, nullptr is okay if uInputSize is zero. |
uInputSize | Size of the input byte array |
|
staticnoexcept |
Convert a Win437 8 bit char into a UTF8 stream.
Take the unsigned 8 bit value of the Win437 character and convert it to a 1,2 or 3 byte UTF8 stream.
pOutput | Pointer to UTF8 buffer that's a minimum 4 bytes in size, nullptr will page fault. |
uInput | Win437 encoded 8 bit character |
|
staticnoexcept |
Convert a Win437 "C" string into a UTF8 stream.
Take a "C" string that is using Win437 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr
to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
pOutput | Pointer to UTF8 buffer to receive the converted string, nullptr is okay if uOutputSize is zero, otherwise it will page fault. |
uOutputSize | Size of the output buffer in bytes. |
pInput | Win437 encoded "C" string, nullptr will page fault. |
|
staticnoexcept |
Convert a Win437 byte array into a UTF8 stream.
Take a byte array that is using Win437 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr
to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
pOutput | Pointer to UTF8 buffer to receive the converted string, nullptr is okay if uOutputSize is zero, otherwise a page fault will occur. |
uOutputSize | Size of the output buffer in bytes. |
pInput | Win437 encoded byte array, nullptr is okay if uInputSize is zero. |
uInputSize | Size of the input byte array |
|
staticnoexcept |
Determine the size of the UTF8 stream for a Win1252 "C" string.
Take a "C" string, encoded with Win1252, and determine the length in bytes this string would require if encoded in UTF8.
pTranslateTable | Pointer to a 128x4 array to use as a UTF8 conversion table. |
pInput | Pointer to a "C" string encoded in Win1252 |
|
staticnoexcept |
Determine the size of the UTF8 stream for a Win1252 buffer.
Take a buffer, encoded with Win1252, and determine the length in bytes this buffer would require if encoded in UTF8.
pTranslateTable | Pointer to a 128x4 array to use as a UTF8 conversion table. |
pInput | Pointer to a buffer encoded in Win1252 |
uInputSize | Number of bytes in the buffer |
|
staticnoexcept |
Determine the size of the UTF8 stream from a char.
Take the unsigned 8 bit value of the supplied character lookup table and return the number of UTF8 bytes it will occupy. The answer is either 1, 2, 3 or 4 bytes. If the input is greater than 0xFF, the returned value is zero.
pTranslateTable | Pointer to a 128x4 array to use as a UTF8 conversion table. |
uInput | Table encoded 8 bit character |
|
staticnoexcept |
Determine the size of the UTF8 stream for a ISOLatin1 "C" string.
Take a "C" string, encoded with ISOLatin1, and determine the length in bytes this string would require if encoded in UTF8.
pInput | Pointer to a "C" string encoded in ISOLatin1 |
|
staticnoexcept |
Determine the size of the UTF8 stream for a ISOLatin1 buffer.
Take a buffer, encoded with ISOLatin1, and determine the length in bytes this buffer would require if encoded in UTF8.
pInput | Pointer to a buffer encoded in ISOLatin1 |
uInputSize | Number of bytes in the buffer |
|
staticnoexcept |
Determine the size of the UTF8 stream for a ISOLatin1 char.
Take the unsigned 8 bit value of the ISOLatin1 character and return the number of UTF8 bytes it will occupy. The answer is either 1 or 2 bytes. If the input is greater than 0xFF, the returned value is zero.
uInput | ISOLatin1 encoded 8 bit character |
|
staticnoexcept |
Determine the size of the UTF8 stream for a MacRomanUS "C" string.
Take a "C" string, encoded with MacRomanUS, and determine the length in bytes this string would require if encoded in UTF8.
pInput | Pointer to a "C" string encoded in MacRomanUS |
|
staticnoexcept |
Determine the size of the UTF8 stream for a MacRomanUS buffer.
Take a buffer, encoded with MacRomanUS, and determine the length in bytes this buffer would require if encoded in UTF8.
pInput | Pointer to a buffer encoded in MacRomanUS |
uInputSize | Number of bytes in the buffer |
|
staticnoexcept |
Determine the size of the UTF8 stream for a MacRomanUS char.
Take the unsigned 8 bit value of the MacRomanUS character and return the number of UTF8 bytes it will occupy. The answer is either 1, 2 or 3 bytes. If the input is greater than 0xFF, the returned value is zero.
uInput | MacRomanUS encoded 8 bit character |
|
staticnoexcept |
Return the number of bytes a UTF8 stream occupies.
Check the UTF8 stream and determine if it's 1-4 bytes in length. No invalid data checking is performed. Use Burger::UTF8::IsValidSingle(const char *) instead.
pInput | Pointer to UTF8 data, nullptr will page fault. |
|
staticnoexcept |
Determine the size of the UTF8 stream for a UTF16 "C" string.
Take a "C" string, encoded with UTF16, and determine the length in bytes this string would require if encoded in UTF8.
pInput | Pointer to a "C" string encoded in UTF16 |
|
staticnoexcept |
Determine the size of the UTF8 stream for a UTF16 buffer.
Take a buffer, encoded with UTF16, and determine the length in bytes this buffer would require if encoded in UTF8.
pInput | Pointer to a buffer encoded in UTF16 |
uInputSize | Number of elements in the buffer |
|
staticnoexcept |
Determine the size of the UTF8 stream for a UTF16 value.
Take the unsigned 16 bit value of the UTF16 character and return the number of UTF8 bytes it will occupy. The answer is either 1, 2 or 3 bytes. If the input is greater than 0xFF, the returned value is zero.
uInput | UTF16 encoded 8 bit character |
|
staticnoexcept |
Determine the size of the UTF8 stream for a Win1252 "C" string.
Take a "C" string, encoded with Win1252, and determine the length in bytes this string would require if encoded in UTF8.
pInput | Pointer to a "C" string encoded in Win1252 |
|
staticnoexcept |
Determine the size of the UTF8 stream for a Win1252 buffer.
Take a buffer, encoded with Win1252, and determine the length in bytes this buffer would require if encoded in UTF8.
pInput | Pointer to a buffer encoded in Win1252 |
uInputSize | Number of bytes in the buffer |
|
staticnoexcept |
Determine the size of the UTF8 stream for a Win1252 char.
Take the unsigned 8 bit value of the Win1252 character and return the number of UTF8 bytes it will occupy. The answer is either 1, 2 or 3 bytes. If the input is greater than 0xFF, the returned value is zero.
uInput | Win1252 encoded 8 bit character |
|
staticnoexcept |
Determine the size of the UTF8 stream for a Win437 "C" string.
Take a "C" string, encoded with Win437, and determine the length in bytes this string would require if encoded in UTF8.
pInput | Pointer to a "C" string encoded in Win437 |
|
staticnoexcept |
Determine the size of the UTF8 stream for a Win437 buffer.
Take a buffer, encoded with Win437, and determine the length in bytes this buffer would require if encoded in UTF8.
pInput | Pointer to a buffer encoded in Win437 |
uInputSize | Number of bytes in the buffer |
|
staticnoexcept |
Determine the size of the UTF8 stream for a Win437 char.
Take the unsigned 8 bit value of the Win437 character and return the number of UTF8 bytes it will occupy. The answer is either 1, 2 or 3 bytes. If the input is greater than 0xFF, the returned value is zero.
uInput | Win437 encoded 8 bit character |
|
staticnoexcept |
|
staticnoexcept |
Check a UTF8 byte array for validity.
Check a byte array and see if it's a valid UTF8 stream. Return FALSE if there was an error, or TRUE if the bytes represent a valid UTF8 pattern.
|
staticnoexcept |
Check a single UTF8 byte pattern for validity.
Check the next 1 to 4 bytes to see if it comprises a valid UTF8 byte pattern and return true if they are, false, if not.
Since UTF8 streams are variable length, there is no function that can take a singular value and check it for validity, you must use this function for single cases or Burger::UTF8::is_valid(const char *) for multi-character streams.
pInput | Pointer to a stream of 1 to 4 UTF8 encoded bytes, nullptr will page fault. |
|
staticnoexcept |
Return the pointer to the next UTF8 token.
Check the UTF8 stream and determine if it's 1-4 bytes in length, then return the supplied pointer incremented by that length. No invalid data checking is performed. Use Burger::UTF8::IsValidSingle(const char *) instead.
pInput | Pointer to UTF8 data, nullptr will page fault. |
|
staticnoexcept |
Convert a UTF8 stream into a generic "C" string.
Take a "C" string that is using UTF8 encoding and convert it to a generic encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr
to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
pOutput | Pointer to byte buffer to receive the converted string, nullptr is okay if uOutputSize is zero, otherwise it will page fault. |
uOutputSize | Size of the output buffer in bytes. |
pTranslateTable | Pointer to a 128x4 array to use as a UTF8 conversion table. |
pInput | A UTF8 encoded "C" string, nullptr will page fault. |
|
staticnoexcept |
Convert a UTF8 stream into a generic byte array.
Take a byte array that is using UTF8 encoding and convert it to a generic encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr
to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.
\param pOutput Pointer to a byte buffer to receive the converted string, ``nullptr`` is okay if uOutputSize is zero, otherwise a page fault will occur. \param uOutputSize Size of the output buffer in bytes. \param pTranslateTable Pointer to a 128x4 array to use as a UTF8 conversion table. \param pInput UTF8 encoded byte array, ``nullptr`` is okay if uInputSize is zero. \param uInputSize Size of the input byte array \return Byte count of the potential output. It is valid, even if the output buffer wasn't large enough to contain everything.
|
staticnoexcept |
Convert a UTF8 stream into a generic 8 bit char.
Take a 1 to 4 byte UTF8 stream and look up the unsigned 8 bit value of the generic character. Codes 0 through 0x7f are considered ASCII while codes 0x80 through 0xFF will be found in the supplied table.
pInput | Pointer to UTF8 buffer that contains the valid stream to convert, nullptr will page fault. |
pTranslateTable | Pointer to a 128x4 array to use as a UTF8 conversion table. |
|
static |
UTF8 text file signature.
If a raw text file starts with this three byte pattern, you're supposed to assume that all of the text that follows is encoded with UTF8.
Note: An explanation is found here at Unicode.org
|
static |
|
static |