Conversion routines to support the UTF8 format. More...

Static Public Member Functions
static uint_t	IsValidSingle (const char *pInput) noexcept
	Check a single UTF8 byte pattern for validity.

static uint_t	IsValid (const char *pInput) noexcept
	Check a UTF8 "C" string for validity.

static uint_t	IsValid (const char *pInput, uintptr_t uInputSize) noexcept
	Check a UTF8 byte array for validity.

static uintptr_t	GetTokenSize (const char *pInput) noexcept
	Return the number of bytes a UTF8 stream occupies.

static const char *	NextToken (const char *pInput) noexcept
	Return the pointer to the next UTF8 token.

static uintptr_t	GetMacRomanUSSize (uint_t uInput) noexcept
	Determine the size of the UTF8 stream for a MacRomanUS char.

static uintptr_t	GetMacRomanUSSize (const char *pInput) noexcept
	Determine the size of the UTF8 stream for a MacRomanUS "C" string.

static uintptr_t	GetMacRomanUSSize (const char *pInput, uintptr_t uInputSize) noexcept
	Determine the size of the UTF8 stream for a MacRomanUS buffer.

static uintptr_t	FromMacRomanUS (char *pOutput, uint_t uInput) noexcept
	Convert a MacRomanUS 8 bit char into a UTF8 stream.

static uintptr_t	FromMacRomanUS (char pOutput, uintptr_t uOutputSize, const char pInput) noexcept
	Convert a MacRomanUS "C" string into a UTF8 stream.

static uintptr_t	FromMacRomanUS (char pOutput, uintptr_t uOutputSize, const char pInput, uintptr_t uInputSize) noexcept
	Convert a MacRomanUS byte array into a UTF8 stream.

static uintptr_t	GetWin1252Size (uint_t uInput) noexcept
	Determine the size of the UTF8 stream for a Win1252 char.

static uintptr_t	GetWin1252Size (const char *pInput) noexcept
	Determine the size of the UTF8 stream for a Win1252 "C" string.

static uintptr_t	GetWin1252Size (const char *pInput, uintptr_t uInputSize) noexcept
	Determine the size of the UTF8 stream for a Win1252 buffer.

static uintptr_t	FromWin1252 (char *pOutput, uint_t uInput) noexcept
	Convert a Win1252 8 bit char into a UTF8 stream.

static uintptr_t	FromWin1252 (char pOutput, uintptr_t uOutputSize, const char pInput) noexcept
	Convert a Win1252 "C" string into a UTF8 stream.

static uintptr_t	FromWin1252 (char pOutput, uintptr_t uOutputSize, const char pInput, uintptr_t uInputSize) noexcept
	Convert a Win1252 byte array into a UTF8 stream.

static uintptr_t	GetWin437Size (uint_t uInput) noexcept
	Determine the size of the UTF8 stream for a Win437 char.

static uintptr_t	GetWin437Size (const char *pInput) noexcept
	Determine the size of the UTF8 stream for a Win437 "C" string.

static uintptr_t	GetWin437Size (const char *pInput, uintptr_t uInputSize) noexcept
	Determine the size of the UTF8 stream for a Win437 buffer.

static uintptr_t	FromWin437 (char *pOutput, uint_t uInput) noexcept
	Convert a Win437 8 bit char into a UTF8 stream.

static uintptr_t	FromWin437 (char pOutput, uintptr_t uOutputSize, const char pInput) noexcept
	Convert a Win437 "C" string into a UTF8 stream.

static uintptr_t	FromWin437 (char pOutput, uintptr_t uOutputSize, const char pInput, uintptr_t uInputSize) noexcept
	Convert a Win437 byte array into a UTF8 stream.

static uintptr_t	GetISOLatin1Size (uint_t uInput) noexcept
	Determine the size of the UTF8 stream for a ISOLatin1 char.

static uintptr_t	GetISOLatin1Size (const char *pInput) noexcept
	Determine the size of the UTF8 stream for a ISOLatin1 "C" string.

static uintptr_t	GetISOLatin1Size (const char *pInput, uintptr_t uInputSize) noexcept
	Determine the size of the UTF8 stream for a ISOLatin1 buffer.

static uintptr_t	FromISOLatin1 (char *pOutput, uint_t uInput) noexcept
	Convert a ISOLatin1 8 bit char into a UTF8 stream.

static uintptr_t	FromISOLatin1 (char pOutput, uintptr_t uOutputSize, const char pInput) noexcept
	Convert a ISOLatin1 "C" string into a UTF8 stream.

static uintptr_t	FromISOLatin1 (char pOutput, uintptr_t uOutputSize, const char pInput, uintptr_t uInputSize) noexcept
	Convert a ISOLatin1 byte array into a UTF8 stream.

static uintptr_t	GetGenericSize (const uint8_t pTranslateTable[128][4], uint_t uInput) noexcept
	Determine the size of the UTF8 stream from a char.

static uintptr_t	GetGenericSize (const uint8_t pTranslateTable[128][4], const char *pInput) noexcept
	Determine the size of the UTF8 stream for a Win1252 "C" string.

static uintptr_t	GetGenericSize (const uint8_t pTranslateTable[128][4], const char *pInput, uintptr_t uInputSize) noexcept
	Determine the size of the UTF8 stream for a Win1252 buffer.

static uintptr_t	FromGeneric (char *pOutput, const uint8_t pTranslateTable[128][4], uint_t uInput) noexcept
	Convert a generic 8 bit char into a UTF8 stream.

static uintptr_t	FromGeneric (char pOutput, uintptr_t uOutputSize, const uint8_t pTranslateTable[128][4], const char pInput) noexcept
	Convert a generic "C" string into a UTF8 stream.

static uintptr_t	FromGeneric (char pOutput, uintptr_t uOutputSize, const uint8_t pTranslateTable[128][4], const char pInput, uintptr_t uInputSize) noexcept
	Convert a generic byte array into a UTF8 stream.

static uintptr_t	GetUTF16Size (uint_t uInput) noexcept
	Determine the size of the UTF8 stream for a UTF16 value.

static uintptr_t	GetUTF16Size (const uint16_t *pInput) noexcept
	Determine the size of the UTF8 stream for a UTF16 "C" string.

static uintptr_t	GetUTF16Size (const uint16_t *pInput, uintptr_t uInputSize) noexcept
	Determine the size of the UTF8 stream for a UTF16 buffer.

static uintptr_t	FromUTF16 (char *pOutput, uint16_t uInput) noexcept
	Convert a UTF16 char into a UTF8 stream.

static uintptr_t	FromUTF16 (char pOutput, uintptr_t uOutputSize, const uint16_t pInput) noexcept
	Convert a UTF16 "C" string into a UTF8 stream.

static uintptr_t	FromUTF16 (char pOutput, uintptr_t uOutputSize, const uint16_t pInput, uintptr_t uInputSize) noexcept
	Convert a UTF16 uint16_t array into a UTF8 stream.

static char *	FromUTF16 (const uint16_t *pInput) noexcept
	Convert a UTF16 "C" string into an allocated UTF8 "C" string.

static uintptr_t	FromUTF32 (char *pOutput, uint32_t uInput) noexcept
	Convert a UTF32 value into a UTF8 stream.

static uintptr_t	FromUTF32 (char pOutput, uintptr_t uOutputSize, const uint32_t pInput) noexcept
	Convert a UTF32 "C" string into a UTF8 stream.

static uintptr_t	FromUTF32 (char pOutput, uintptr_t uOutputSize, const uint32_t pInput, uintptr_t uInputSize) noexcept
	Convert a UTF32 uint32_t array into a UTF8 stream.

static char *	FromUTF32 (const uint32_t *pInput) noexcept
	Convert a UTF32 "C" string into an allocated UTF8 "C" string.

static uint_t	ToGeneric (const char *pInput, const uint8_t pTranslateTable[128][4]) noexcept
	Convert a UTF8 stream into a generic 8 bit char.

static uintptr_t	ToGeneric (char pOutput, uintptr_t uOutputSize, const uint8_t pTranslateTable[128][4], const char pInput) noexcept
	Convert a UTF8 stream into a generic "C" string.

static uintptr_t	ToGeneric (char pOutput, uintptr_t uOutputSize, const uint8_t pTranslateTable[128][4], const char pInput, uintptr_t uInputSize) noexcept
	Convert a UTF8 stream into a generic byte array.

Static Public Attributes
static const uint_t	kInvalid = UINT32_MAX
	Value returned if a routine failed.

static const uint8_t	TokenSizeTable [256]
	Table to determine the size of a UTF8 token stream.

static const uint8_t	ByteOrderMark [3] = {0xEF, 0xBB, 0xBF}
	UTF8 text file signature.

Detailed Description

Conversion routines to support the UTF8 format.

UTF8 is a format that allows Unicode data to be stored in a standard "C" string with little modification to most existing string managers. All Burgerlib functions accept UTF8 strings so that they can properly present international characters in a consistent manner across numerous target platforms.

Member Function Documentation

◆ FromGeneric() [1/3]

uintptr_t BURGER_API Burger::UTF8::FromGeneric	(	char *	pOutput,
		const uint8_t	pTranslateTable[128][4],
		uint_t	uInput )

staticnoexcept

Convert a generic 8 bit char into a UTF8 stream.

Take the unsigned 8 bit value of the generic character and convert it to a 1 to 4 byte UTF8 stream. Codes 0 through 0x7f are considered ASCII while codes 0x80 through 0xFF will be found in the supplied table.

The user supplied must contain valid UTF8 byte patterns. This routine will not perform validation on the contents of the table and if the table has bad data, the UTF8 stream produced by this function will be error prone.

Note: This function will write a zero after the stream so you can assume that it's a valid "C" string.

Parameters

pOutput	Pointer to UTF8 buffer that's a minimum 5 bytes in size, `nullptr` will page fault.
pTranslateTable	Pointer to a 128x4 array to use as a UTF8 conversion table.
uInput	Generic encoded 8 bit character

Returns: The number of bytes written to the stream. 1, 2, 3, 4 or 0 if uInput is >=256.

◆ FromGeneric() [2/3]

uintptr_t BURGER_API Burger::UTF8::FromGeneric	(	char *	pOutput,
		uintptr_t	uOutputSize,
		const uint8_t	pTranslateTable[128][4],
		const char *	pInput )

staticnoexcept

Convert a generic "C" string into a UTF8 stream.

Take a "C" string that is using generic encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.

The user supplied must contain valid UTF8 byte patterns. This routine will not perform validation on the contents of the table and if the table has bad data, the UTF8 stream produced by this function will be error prone.

Note: This function will ensure that the string is always zero terminated, even if truncation is necessary to get it to fit in the output buffer. Under no circumstances will the output buffer be overrun.

Parameters

pOutput	Pointer to UTF8 buffer to receive the converted string, `nullptr` is okay if uOutputSize is zero, otherwise it will page fault.
uOutputSize	Size of the output buffer in bytes.
pTranslateTable	Pointer to a 128x4 array to use as a UTF8 conversion table.
pInput	A generic encoded "C" string, `nullptr` will page fault.

Returns: Byte count of the potential output. It is valid, even if the output buffer wasn't large enough to contain everything.

◆ FromGeneric() [3/3]

uintptr_t BURGER_API Burger::UTF8::FromGeneric	(	char *	pOutput,
		uintptr_t	uOutputSize,
		const uint8_t	pTranslateTable[128][4],
		const char *	pInput,
		uintptr_t	uInputSize )

staticnoexcept

Convert a generic byte array into a UTF8 stream.

Take a byte array that is using generic encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.

The user supplied must contain valid UTF8 byte patterns. This routine will not perform validation on the contents of the table and if the table has bad data, the UTF8 stream produced by this function will be error prone.

Note: This function will ensure that the string is always zero terminated, even if truncation is necessary to get it to fit in the output buffer. Under no circumstances will the output buffer be overrun.; Zeros can be encoded into the stream. This function will not early out if a zero was parsed. Zeros will be placed in the UTF8 stream as is.

Parameters

pOutput	Pointer to UTF8 buffer to receive the converted string, `nullptr` is okay if uOutputSize is zero, otherwise a page fault will occur.
uOutputSize	Size of the output buffer in bytes.
pTranslateTable	Pointer to a 128x4 array to use as a UTF8 conversion table.
pInput	Generic encoded byte array, `nullptr` is okay if uInputSize is zero.
uInputSize	Size of the input byte array

Returns: Byte count of the potential output. It is valid, even if the output buffer wasn't large enough to contain everything.

◆ FromISOLatin1() [1/3]

uintptr_t BURGER_API Burger::UTF8::FromISOLatin1	(	char *	pOutput,
		uint_t	uInput )

staticnoexcept

Convert a ISOLatin1 8 bit char into a UTF8 stream.

Take the unsigned 8 bit value of the ISOLatin1 character and convert it to a 1 or 2 byte UTF8 stream.

Note: This function will write a zero after the stream so you can assume that it's a valid "C" string.

Parameters

pOutput	Pointer to UTF8 buffer that's a minimum 3 bytes in size, `nullptr` will page fault.
uInput	ISOLatin1 encoded 8 bit character

Returns: The number of bytes written to the stream. 1, 2 or 0 if uInput is >=256.

◆ FromISOLatin1() [2/3]

uintptr_t BURGER_API Burger::UTF8::FromISOLatin1	(	char *	pOutput,
		uintptr_t	uOutputSize,
		const char *	pInput )

staticnoexcept

Convert a ISOLatin1 "C" string into a UTF8 stream.

Take a "C" string that is using ISOLatin1 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.

Note: This function will ensure that the string is always zero terminated, even if truncation is necessary to get it to fit in the output buffer. Under no circumstances will the output buffer be overrun.

Parameters

pOutput	Pointer to UTF8 buffer to receive the converted string, `nullptr` is okay if uOutputSize is zero, otherwise it will page fault.
uOutputSize	Size of the output buffer in bytes.
pInput	ISOLatin1 encoded "C" string, `nullptr` will page fault.

Returns: Byte count of the potential output. It is valid, even if the output buffer wasn't large enough to contain everything.

◆ FromISOLatin1() [3/3]

uintptr_t BURGER_API Burger::UTF8::FromISOLatin1	(	char *	pOutput,
		uintptr_t	uOutputSize,
		const char *	pInput,
		uintptr_t	uInputSize )

staticnoexcept

Convert a ISOLatin1 byte array into a UTF8 stream.

Take a byte array that is using ISOLatin1 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.

Note: This function will ensure that the string is always zero terminated, even if truncation is necessary to get it to fit in the output buffer. Under no circumstances will the output buffer be overrun.; Zeros can be encoded into the stream. This function will not early out if a zero was parsed. Zeros will be placed in the UTF8 stream as is.

Parameters

pOutput	Pointer to UTF8 buffer to receive the converted string, `nullptr` is okay if uOutputSize is zero, otherwise a page fault will occur.
uOutputSize	Size of the output buffer in bytes.
pInput	ISOLatin1 encoded byte array, `nullptr` is okay if uInputSize is zero.
uInputSize	Size of the input byte array

Returns: Byte count of the potential output. It is valid, even if the output buffer wasn't large enough to contain everything.

◆ FromMacRomanUS() [1/3]

uintptr_t BURGER_API Burger::UTF8::FromMacRomanUS	(	char *	pOutput,
		uint_t	uInput )

staticnoexcept

Convert a MacRomanUS 8 bit char into a UTF8 stream.

Take the unsigned 8 bit value of the MacRomanUS character and convert it to a 1, 2 or 3 byte UTF8 stream. Only the UTF8 characters are written.

Parameters

pOutput	Pointer to UTF8 buffer that's a minimum 4 bytes in size, `nullptr` will page fault.
uInput	MacRomanUS encoded 8 bit character

Returns: The number of bytes written to the stream. 1, 2, 3 or 0 if uInput is >=256.

◆ FromMacRomanUS() [2/3]

uintptr_t BURGER_API Burger::UTF8::FromMacRomanUS	(	char *	pOutput,
		uintptr_t	uOutputSize,
		const char *	pInput )

staticnoexcept

Convert a MacRomanUS "C" string into a UTF8 stream.

Take a "C" string that is using MacRomanUS encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.

Note: This function will ensure that the string is always zero terminated, even if truncation is necessary to get it to fit in the output buffer. Under no circumstances will the output buffer be overrun.

Parameters

pOutput	Pointer to UTF8 buffer to receive the converted string, `nullptr` is okay if uOutputSize is zero, otherwise it will page fault.
uOutputSize	Size of the output buffer in bytes.
pInput	MacRomanUS encoded "C" string, `nullptr` will page fault.

Returns: Byte count of the potential output. It is valid, even if the output buffer wasn't large enough to contain everything.

◆ FromMacRomanUS() [3/3]

uintptr_t BURGER_API Burger::UTF8::FromMacRomanUS	(	char *	pOutput,
		uintptr_t	uOutputSize,
		const char *	pInput,
		uintptr_t	uInputSize )

staticnoexcept

Convert a MacRomanUS byte array into a UTF8 stream.

Take a byte array that is using MacRomanUS encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.

Note: This function will ensure that the string is always zero terminated, even if truncation is necessary to get it to fit in the output buffer. Under no circumstances will the output buffer be overrun.; Zeros can be encoded into the stream. This function will not early out if a zero was parsed. Zeros will be placed in the UTF8 stream as is.

Parameters

pOutput	Pointer to UTF8 buffer to receive the converted string, `nullptr` is okay if uOutputSize is zero, otherwise a page fault will occur.
uOutputSize	Size of the output buffer in bytes.
pInput	MacRomanUS encoded byte array, `nullptr` is okay if uInputSize is zero.
uInputSize	Size of the input byte array

Returns: Byte count of the potential output. It is valid, even if the output buffer wasn't large enough to contain everything.

◆ FromUTF16() [1/4]

uintptr_t BURGER_API Burger::UTF8::FromUTF16	(	char *	pOutput,
		uint16_t	uInput )

staticnoexcept

Convert a UTF16 char into a UTF8 stream.

Take the unsigned 16 bit value of the UTF16 character and convert it to a 1,2 or 3 byte UTF8 stream.

Note: This will NOT parse word pairs. It will return a 0 and not process the value if it's 0xD800-0xDFFF which is an escape token for UTF16.

Parameters

pOutput	Pointer to UTF8 buffer that's a minimum 4 bytes in size, `nullptr` will page fault.
uInput	UTF16 encoded 16 bit character

Returns: The number of bytes written to the stream. 1, 2, 3 or 0 if uInput is >=0xD800 and <=0xDFFF.

See also: FromUTF16(char *,uintptr_t, const uint16_t *)

◆ FromUTF16() [2/4]

uintptr_t BURGER_API Burger::UTF8::FromUTF16	(	char *	pOutput,
		uintptr_t	uOutputSize,
		const uint16_t *	pInput )

staticnoexcept

Convert a UTF16 "C" string into a UTF8 stream.

Take a "C" string that is using UTF16 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be NULL to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.

UTF16 surrogate pairs will be properly parsed and encoded into their UTF8 equivalents.

Note: This function will ensure that the string is always zero terminated, even if truncation is necessary to get it to fit in the output buffer. Under no circumstances will the output buffer be overrun.; If invalid UTF16 surrogate pair(s) are found, they will be skipped.

Parameters

pOutput	Pointer to UTF8 buffer to receive the converted string, `nullptr` is okay if uOutputSize is zero, otherwise it will page fault.
uOutputSize	Size of the output buffer in bytes.
pInput	UTF16 encoded "C" string, `nullptr` will page fault.

Returns: Byte count of the potential output. It is valid, even if the output buffer wasn't large enough to contain everything.

◆ FromUTF16() [3/4]

uintptr_t BURGER_API Burger::UTF8::FromUTF16	(	char *	pOutput,
		uintptr_t	uOutputSize,
		const uint16_t *	pInput,
		uintptr_t	uInputSize )

staticnoexcept

Convert a UTF16 uint16_t array into a UTF8 stream.

Take a uint16_t array that is using UTF16 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.

UTF16 surrogate pairs will be properly parsed and encoded into their UTF8 equivalents.

Note: This function will ensure that the string is always zero terminated, even if truncation is necessary to get it to fit in the output buffer. Under no circumstances will the output buffer be overrun.; Zeros can be encoded into the stream. This function will not early out if a zero was parsed. Zeros will be placed in the UTF8 stream as is.

Parameters

pOutput	Pointer to UTF8 buffer to receive the converted string, `nullptr` is okay if uOutputSize is zero, otherwise a page fault will occur.
uOutputSize	Size of the output buffer in bytes.
pInput	UTF16 encoded uint16_t array, `nullptr` is okay if uInputSize is zero.
uInputSize	Size of the input uint16_t array in elements

Returns: Byte count of the potential output. It is valid, even if the output buffer wasn't large enough to contain everything.

◆ FromUTF16() [4/4]

char *BURGER_API Burger::UTF8::FromUTF16 ( const uint16_t * pInput )

staticnoexcept

Convert a UTF16 "C" string into an allocated UTF8 "C" string.

Take a "C" string that is using UTF16 encoding and convert it to a UTF8 encoded "C" string. The function will allocate a buffer large enough to store the string. When the string isn't needed anymore, release the memory with a call to Burger::Free(const void *)

UTF16 surrogate pairs will be properly parsed and encoded into their UTF8 equivalents.

Note: If invalid UTF16 surrogate pair(s) are found, they will be skipped.

Parameters

pInput UTF16 encoded "C" string, nullptr will page fault.

Returns: A valid pointer to a UTF-8 version of the "C" string. nullptr if memory allocation failure.

See also: Burger::Free(const void *)

◆ FromUTF32() [1/4]

uintptr_t BURGER_API Burger::UTF8::FromUTF32	(	char *	pOutput,
		uint32_t	uInput )

staticnoexcept

Convert a UTF32 value into a UTF8 stream.

Given a valid UTF32 value (0-0xD7FF / 0xE000-0x10FFFF), encode it into a valid UTF8 stream. If the value is invalid, it will NOT be encoded.

The output buffer must have at least 5 bytes available.

Note: The function will zero terminate the stream. The zero isn't counted on the data length.

Parameters

pOutput	Pointer to a char buffer of a minimum of 5 bytes in size, `nullptr` is invalid.
uInput	UTF32 encoded character value.

Returns: Number of bytes used to store the UTF8 stream, zero if the value can't be encoded.

◆ FromUTF32() [2/4]

uintptr_t BURGER_API Burger::UTF8::FromUTF32	(	char *	pOutput,
		uintptr_t	uOutputSize,
		const uint32_t *	pInput )

staticnoexcept

Convert a UTF32 "C" string into a UTF8 stream.

Take a "C" string that is using UTF32 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.

Note: This function will ensure that the string is always zero terminated, even if truncation is necessary to get it to fit in the output buffer. Under no circumstances will the output buffer be overrun.; If invalid UTF32 values are found, they will be skipped.

Parameters

pOutput	Pointer to UTF8 buffer to receive the converted string, `nullptr` is okay if uOutputSize is zero, otherwise it will page fault.
uOutputSize	Size of the output buffer in bytes.
pInput	UTF32 encoded "C" string, `nullptr` will page fault.

Returns: Byte count of the potential output. It is valid, even if the output buffer wasn't large enough to contain everything.

◆ FromUTF32() [3/4]

uintptr_t BURGER_API Burger::UTF8::FromUTF32	(	char *	pOutput,
		uintptr_t	uOutputSize,
		const uint32_t *	pInput,
		uintptr_t	uInputSize )

staticnoexcept

Convert a UTF32 uint32_t array into a UTF8 stream.

Take a uint32_t array that is using UTF32 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.

Note: This function will ensure that the string is always zero terminated, even if truncation is necessary to get it to fit in the output buffer. Under no circumstances will the output buffer be overrun.; Zeros can be encoded into the stream. This function will not early out if a zero was parsed. Zeros will be placed in the UTF8 stream as is.

Parameters

pOutput	Pointer to UTF8 buffer to receive the converted string, `nullptr` is okay if uOutputSize is zero, otherwise a page fault will occur.
uOutputSize	Size of the output buffer in bytes.
pInput	UTF32 encoded uint32_t array, `nullptr` is okay if uInputSize is zero.
uInputSize	Size of the input uint32_t array in bytes

Returns: Byte count of the potential output. It is valid, even if the output buffer wasn't large enough to contain everything.

◆ FromUTF32() [4/4]

char *BURGER_API Burger::UTF8::FromUTF32 ( const uint32_t * pInput )

staticnoexcept

Convert a UTF32 "C" string into an allocated UTF8 "C" string.

Take a "C" string that is using UTF32 encoding and convert it to a UTF8 encoded "C" string. The function will allocate a buffer large enough to store the string. When the string isn't needed anymore, release the memory with a call to Burger::Free(const void *)

Note: If invalid UTF32 codes are found, they will be skipped.

Parameters

pInput UTF32 encoded "C" string, nullptr will page fault.

Returns: A valid pointer to a UTF-8 version of the "C" string. nullptr if memory allocation failure.

See also: Burger::Free(const void *)

◆ FromWin1252() [1/3]

uintptr_t BURGER_API Burger::UTF8::FromWin1252	(	char *	pOutput,
		uint_t	uInput )

staticnoexcept

Convert a Win1252 8 bit char into a UTF8 stream.

Take the unsigned 8 bit value of the Win1252 character and convert it to a 1, 2 or 3 byte UTF8 stream.

Note: This function will write a zero after the stream so you can assume that it's a valid "C" string.

Parameters

pOutput	Pointer to UTF8 buffer that's a minimum 4 bytes in size, `nullptr` will page fault.
uInput	Win1252 encoded 8 bit character

Returns: The number of bytes written to the stream. 1, 2, 3 or 0 if uInput is >=256.

◆ FromWin1252() [2/3]

uintptr_t BURGER_API Burger::UTF8::FromWin1252	(	char *	pOutput,
		uintptr_t	uOutputSize,
		const char *	pInput )

staticnoexcept

Convert a Win1252 "C" string into a UTF8 stream.

Take a "C" string that is using Win1252 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.

Note: This function will ensure that the string is always zero terminated, even if truncation is necessary to get it to fit in the output buffer. Under no circumstances will the output buffer be overrun.

Parameters

pOutput	Pointer to UTF8 buffer to receive the converted string, `nullptr` is okay if uOutputSize is zero, otherwise it will page fault.
uOutputSize	Size of the output buffer in bytes.
pInput	Win1252 encoded "C" string, `nullptr` will page fault.

Returns: Byte count of the potential output. It is valid, even if the output buffer wasn't large enough to contain everything.

◆ FromWin1252() [3/3]

uintptr_t BURGER_API Burger::UTF8::FromWin1252	(	char *	pOutput,
		uintptr_t	uOutputSize,
		const char *	pInput,
		uintptr_t	uInputSize )

staticnoexcept

Convert a Win1252 byte array into a UTF8 stream.

Take a byte array that is using Win1252 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.

Note: This function will ensure that the string is always zero terminated, even if truncation is necessary to get it to fit in the output buffer. Under no circumstances will the output buffer be overrun.; Zeros can be encoded into the stream. This function will not early out if a zero was parsed. Zeros will be placed in the UTF8 stream as is.

Parameters

pOutput	Pointer to UTF8 buffer to receive the converted string, `nullptr` is okay if uOutputSize is zero, otherwise a page fault will occur.
uOutputSize	Size of the output buffer in bytes.
pInput	Win1252 encoded byte array, `nullptr` is okay if uInputSize is zero.
uInputSize	Size of the input byte array

Returns: Byte count of the potential output. It is valid, even if the output buffer wasn't large enough to contain everything.

◆ FromWin437() [1/3]

uintptr_t BURGER_API Burger::UTF8::FromWin437	(	char *	pOutput,
		uint_t	uInput )

staticnoexcept

Convert a Win437 8 bit char into a UTF8 stream.

Take the unsigned 8 bit value of the Win437 character and convert it to a 1,2 or 3 byte UTF8 stream.

Note: This function will write a zero after the stream so you can assume that it's a valid "C" string.

Parameters

pOutput	Pointer to UTF8 buffer that's a minimum 4 bytes in size, `nullptr` will page fault.
uInput	Win437 encoded 8 bit character

Returns: The number of bytes written to the stream. 1, 2, 3 or 0 if uInput is >=256.

◆ FromWin437() [2/3]

uintptr_t BURGER_API Burger::UTF8::FromWin437	(	char *	pOutput,
		uintptr_t	uOutputSize,
		const char *	pInput )

staticnoexcept

Convert a Win437 "C" string into a UTF8 stream.

Take a "C" string that is using Win437 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.

Note: This function will ensure that the string is always zero terminated, even if truncation is necessary to get it to fit in the output buffer. Under no circumstances will the output buffer be overrun.

Parameters

pOutput	Pointer to UTF8 buffer to receive the converted string, `nullptr` is okay if uOutputSize is zero, otherwise it will page fault.
uOutputSize	Size of the output buffer in bytes.
pInput	Win437 encoded "C" string, `nullptr` will page fault.

Returns: Byte count of the potential output. It is valid, even if the output buffer wasn't large enough to contain everything.

◆ FromWin437() [3/3]

uintptr_t BURGER_API Burger::UTF8::FromWin437	(	char *	pOutput,
		uintptr_t	uOutputSize,
		const char *	pInput,
		uintptr_t	uInputSize )

staticnoexcept

Convert a Win437 byte array into a UTF8 stream.

Take a byte array that is using Win437 encoding and convert it to a UTF8 encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.

Note: This function will ensure that the string is always zero terminated, even if truncation is necessary to get it to fit in the output buffer. Under no circumstances will the output buffer be overrun.; Zeros can be encoded into the stream. This function will not early out if a zero was parsed. Zeros will be placed in the UTF8 stream as is.

Parameters

pOutput	Pointer to UTF8 buffer to receive the converted string, `nullptr` is okay if uOutputSize is zero, otherwise a page fault will occur.
uOutputSize	Size of the output buffer in bytes.
pInput	Win437 encoded byte array, `nullptr` is okay if uInputSize is zero.
uInputSize	Size of the input byte array

Returns: Byte count of the potential output. It is valid, even if the output buffer wasn't large enough to contain everything.

◆ GetGenericSize() [1/3]

uintptr_t BURGER_API Burger::UTF8::GetGenericSize	(	const uint8_t	pTranslateTable[128][4],
		const char *	pInput )

staticnoexcept

Determine the size of the UTF8 stream for a Win1252 "C" string.

Take a "C" string, encoded with Win1252, and determine the length in bytes this string would require if encoded in UTF8.

Parameters

pTranslateTable	Pointer to a 128x4 array to use as a UTF8 conversion table.
pInput	Pointer to a "C" string encoded in Win1252

Returns: The number of bytes the string would require if converted.

See also: GetGenericSize(const uint8_t [128][4], uint_t uInput) or GetGenericSize( const uint8_t [128][4], const char *, uintptr_t)

◆ GetGenericSize() [2/3]

uintptr_t BURGER_API Burger::UTF8::GetGenericSize	(	const uint8_t	pTranslateTable[128][4],
		const char *	pInput,
		uintptr_t	uInputSize )

staticnoexcept

Determine the size of the UTF8 stream for a Win1252 buffer.

Take a buffer, encoded with Win1252, and determine the length in bytes this buffer would require if encoded in UTF8.

Parameters

pTranslateTable	Pointer to a 128x4 array to use as a UTF8 conversion table.
pInput	Pointer to a buffer encoded in Win1252
uInputSize	Number of bytes in the buffer

Returns: The number of bytes the buffer would require if converted.

See also: GetGenericSize(const uint8_t [128][4], uint_t uInput) or GetGenericSize( const uint8_t [128][4], const char *, uintptr_t)

◆ GetGenericSize() [3/3]

uintptr_t BURGER_API Burger::UTF8::GetGenericSize	(	const uint8_t	pTranslateTable[128][4],
		uint_t	uInput )

staticnoexcept

Determine the size of the UTF8 stream from a char.

Take the unsigned 8 bit value of the supplied character lookup table and return the number of UTF8 bytes it will occupy. The answer is either 1, 2, 3 or 4 bytes. If the input is greater than 0xFF, the returned value is zero.

Parameters

pTranslateTable	Pointer to a 128x4 array to use as a UTF8 conversion table.
uInput	Table encoded 8 bit character

Returns: The number of bytes needed to UTF8 encode. 1, 2, 3, 4 or 0 if uInput is >=256.

See also: GetGenericSize(const uint8_t [128][4], const char*) or GetGenericSize( const uint8_t [128][4], const char*, uintptr_t)

◆ GetISOLatin1Size() [1/3]

uintptr_t BURGER_API Burger::UTF8::GetISOLatin1Size ( const char * pInput )

staticnoexcept

Determine the size of the UTF8 stream for a ISOLatin1 "C" string.

Take a "C" string, encoded with ISOLatin1, and determine the length in bytes this string would require if encoded in UTF8.

Parameters

pInput Pointer to a "C" string encoded in ISOLatin1

Returns: The number of bytes the string would require if converted.

See also: GetISOLatin1Size(uint_t uInput) or GetISOLatin1Size( const char *, uintptr_t)

◆ GetISOLatin1Size() [2/3]

uintptr_t BURGER_API Burger::UTF8::GetISOLatin1Size	(	const char *	pInput,
		uintptr_t	uInputSize )

staticnoexcept

Determine the size of the UTF8 stream for a ISOLatin1 buffer.

Take a buffer, encoded with ISOLatin1, and determine the length in bytes this buffer would require if encoded in UTF8.

Parameters

pInput	Pointer to a buffer encoded in ISOLatin1
uInputSize	Number of bytes in the buffer

Returns: The number of bytes the buffer would require if converted.

See also: GetISOLatin1Size(uint_t uInput) or GetISOLatin1Size( const char *, uintptr_t)

◆ GetISOLatin1Size() [3/3]

uintptr_t BURGER_API Burger::UTF8::GetISOLatin1Size ( uint_t uInput )

staticnoexcept

Determine the size of the UTF8 stream for a ISOLatin1 char.

Take the unsigned 8 bit value of the ISOLatin1 character and return the number of UTF8 bytes it will occupy. The answer is either 1 or 2 bytes. If the input is greater than 0xFF, the returned value is zero.

Parameters

uInput ISOLatin1 encoded 8 bit character

Returns: The number of bytes needed to UTF8 encode. 1, 2 or 0 if uInput is >=256.

See also: GetISOLatin1Size(const char*) or GetISOLatin1Size( const char*, uintptr_t)

◆ GetMacRomanUSSize() [1/3]

uintptr_t BURGER_API Burger::UTF8::GetMacRomanUSSize ( const char * pInput )

staticnoexcept

Determine the size of the UTF8 stream for a MacRomanUS "C" string.

Take a "C" string, encoded with MacRomanUS, and determine the length in bytes this string would require if encoded in UTF8.

Parameters

pInput Pointer to a "C" string encoded in MacRomanUS

Returns: The number of bytes the string would require if converted.

See also: GetMacRomanUSSize(uint_t uInput) or GetMacRomanUSSize( const char *, uintptr_t)

◆ GetMacRomanUSSize() [2/3]

uintptr_t BURGER_API Burger::UTF8::GetMacRomanUSSize	(	const char *	pInput,
		uintptr_t	uInputSize )

staticnoexcept

Determine the size of the UTF8 stream for a MacRomanUS buffer.

Take a buffer, encoded with MacRomanUS, and determine the length in bytes this buffer would require if encoded in UTF8.

Parameters

pInput	Pointer to a buffer encoded in MacRomanUS
uInputSize	Number of bytes in the buffer

Returns: The number of bytes the buffer would require if converted.

See also: GetMacRomanUSSize(uint_t uInput) or GetMacRomanUSSize( const char *, uintptr_t)

◆ GetMacRomanUSSize() [3/3]

uintptr_t BURGER_API Burger::UTF8::GetMacRomanUSSize ( uint_t uInput )

staticnoexcept

Determine the size of the UTF8 stream for a MacRomanUS char.

Take the unsigned 8 bit value of the MacRomanUS character and return the number of UTF8 bytes it will occupy. The answer is either 1, 2 or 3 bytes. If the input is greater than 0xFF, the returned value is zero.

Parameters

uInput MacRomanUS encoded 8 bit character

Returns: The number of bytes needed to UTF8 encode. 1, 2, 3 or 0 if uInput is >=256.

See also: GetMacRomanUSSize(const char*) or GetMacRomanUSSize( const char*, uintptr_t)

◆ GetTokenSize()

uintptr_t BURGER_API Burger::UTF8::GetTokenSize ( const char * pInput )

staticnoexcept

Return the number of bytes a UTF8 stream occupies.

Check the UTF8 stream and determine if it's 1-4 bytes in length. No invalid data checking is performed. Use Burger::UTF8::IsValidSingle(const char *) instead.

Parameters

pInput Pointer to UTF8 data, nullptr will page fault.

Returns: 1-4 for the token size.

◆ GetUTF16Size() [1/3]

uintptr_t BURGER_API Burger::UTF8::GetUTF16Size ( const uint16_t * pInput )

staticnoexcept

Determine the size of the UTF8 stream for a UTF16 "C" string.

Take a "C" string, encoded with UTF16, and determine the length in bytes this string would require if encoded in UTF8.

Parameters

pInput Pointer to a "C" string encoded in UTF16

Returns: The number of bytes the string would require if converted.

See also: GetUTF16Size(uint_t uInput) or GetUTF16Size( const uint16_t*, uintptr_t)

◆ GetUTF16Size() [2/3]

uintptr_t BURGER_API Burger::UTF8::GetUTF16Size	(	const uint16_t *	pInput,
		uintptr_t	uInputSize )

staticnoexcept

Determine the size of the UTF8 stream for a UTF16 buffer.

Take a buffer, encoded with UTF16, and determine the length in bytes this buffer would require if encoded in UTF8.

Parameters

pInput	Pointer to a buffer encoded in UTF16
uInputSize	Number of elements in the buffer

Returns: The number of bytes the buffer would require if converted.

See also: GetUTF16Size(uint_t uInput) or GetUTF16Size( const uint16_t *, uintptr_t)

◆ GetUTF16Size() [3/3]

uintptr_t BURGER_API Burger::UTF8::GetUTF16Size ( uint_t uInput )

staticnoexcept

Determine the size of the UTF8 stream for a UTF16 value.

Take the unsigned 16 bit value of the UTF16 character and return the number of UTF8 bytes it will occupy. The answer is either 1, 2 or 3 bytes. If the input is greater than 0xFF, the returned value is zero.

Parameters

uInput UTF16 encoded 8 bit character

Returns: The number of bytes needed to UTF8 encode. 1, 2, 3 or 0 if uInput is >=256.

See also: GetUTF16Size(const uint16_t*) or GetUTF16Size( const uint16_t*, uintptr_t)

◆ GetWin1252Size() [1/3]

uintptr_t BURGER_API Burger::UTF8::GetWin1252Size ( const char * pInput )

staticnoexcept

Determine the size of the UTF8 stream for a Win1252 "C" string.

Take a "C" string, encoded with Win1252, and determine the length in bytes this string would require if encoded in UTF8.

Parameters

pInput Pointer to a "C" string encoded in Win1252

Returns: The number of bytes the string would require if converted.

See also: GetWin1252Size(uint_t uInput) or GetWin1252Size( const char *, uintptr_t)

◆ GetWin1252Size() [2/3]

uintptr_t BURGER_API Burger::UTF8::GetWin1252Size	(	const char *	pInput,
		uintptr_t	uInputSize )

staticnoexcept

Determine the size of the UTF8 stream for a Win1252 buffer.

Take a buffer, encoded with Win1252, and determine the length in bytes this buffer would require if encoded in UTF8.

Parameters

pInput	Pointer to a buffer encoded in Win1252
uInputSize	Number of bytes in the buffer

Returns: The number of bytes the buffer would require if converted.

See also: GetWin1252Size(uint_t uInput) or GetWin1252Size( const char *, uintptr_t)

◆ GetWin1252Size() [3/3]

uintptr_t BURGER_API Burger::UTF8::GetWin1252Size ( uint_t uInput )

staticnoexcept

Determine the size of the UTF8 stream for a Win1252 char.

Take the unsigned 8 bit value of the Win1252 character and return the number of UTF8 bytes it will occupy. The answer is either 1, 2 or 3 bytes. If the input is greater than 0xFF, the returned value is zero.

Parameters

uInput Win1252 encoded 8 bit character

Returns: The number of bytes needed to UTF8 encode. 1, 2, 3 or 0 if uInput is >=256.

See also: GetWin1252Size(const char*) or GetWin1252Size( const char*, uintptr_t)

◆ GetWin437Size() [1/3]

uintptr_t BURGER_API Burger::UTF8::GetWin437Size ( const char * pInput )

staticnoexcept

Determine the size of the UTF8 stream for a Win437 "C" string.

Take a "C" string, encoded with Win437, and determine the length in bytes this string would require if encoded in UTF8.

Parameters

pInput Pointer to a "C" string encoded in Win437

Returns: The number of bytes the string would require if converted.

See also: GetWin437Size(uint_t uInput) or GetWin437Size( const char *, uintptr_t)

◆ GetWin437Size() [2/3]

uintptr_t BURGER_API Burger::UTF8::GetWin437Size	(	const char *	pInput,
		uintptr_t	uInputSize )

staticnoexcept

Determine the size of the UTF8 stream for a Win437 buffer.

Take a buffer, encoded with Win437, and determine the length in bytes this buffer would require if encoded in UTF8.

Parameters

pInput	Pointer to a buffer encoded in Win437
uInputSize	Number of bytes in the buffer

Returns: The number of bytes the buffer would require if converted.

See also: GetWin437Size(uint_t uInput) or GetWin437Size( const char *, uintptr_t)

◆ GetWin437Size() [3/3]

uintptr_t BURGER_API Burger::UTF8::GetWin437Size ( uint_t uInput )

staticnoexcept

Determine the size of the UTF8 stream for a Win437 char.

Take the unsigned 8 bit value of the Win437 character and return the number of UTF8 bytes it will occupy. The answer is either 1, 2 or 3 bytes. If the input is greater than 0xFF, the returned value is zero.

Parameters

uInput Win437 encoded 8 bit character

Returns: The number of bytes needed to UTF8 encode. 1, 2, 3 or 0 if uInput is >=256.

See also: GetWin437Size(const char*) or GetWin437Size( const char*, uintptr_t)

◆ IsValid() [1/2]

uint_t BURGER_API Burger::UTF8::IsValid ( const char * pInput )

staticnoexcept

Check a UTF8 "C" string for validity.

Check a "C" string to see if it's a valid UTF8 stream. Return FALSE if there was an error, or TRUE if the bytes represent a valid UTF8 pattern.

Parameters

pInput Pointer to a zero terminated string, nullptr will page fault.

Returns: TRUE if the entire string is a valid UTF8 stream, FALSE if not.

◆ IsValid() [2/2]

uint_t BURGER_API Burger::UTF8::IsValid	(	const char *	pInput,
		uintptr_t	uInputSize )

staticnoexcept

Check a UTF8 byte array for validity.

Check a byte array and see if it's a valid UTF8 stream. Return FALSE if there was an error, or TRUE if the bytes represent a valid UTF8 pattern.

Parameters

pInput	Pointer to UTF8 data. Can be `nullptr` if uInputSize is zero, otherwise page fault.
uInputSize	Length of the data in bytes, if zero, then the function will return TRUE.

Returns: TRUE if the entire string is a valid UTF8 stream, FALSE if not.

◆ IsValidSingle()

uint_t BURGER_API Burger::UTF8::IsValidSingle ( const char * pInput )

staticnoexcept

Check a single UTF8 byte pattern for validity.

Check the next 1 to 4 bytes to see if it comprises a valid UTF8 byte pattern and return true if they are, false, if not.

Since UTF8 streams are variable length, there is no function that can take a singular value and check it for validity, you must use this function for single cases or Burger::UTF8::IsValid(const char *) for multi-character streams.

Note: This function is called Burger::UTF8::IsValidSingle(const char *) because of a name conflict with Burger::UTF8::IsValid(const char *).

Parameters

pInput Pointer to a stream of 1 to 4 UTF8 encoded bytes, nullptr will page fault.

Returns: true if the next 1-4 bytes is a valid UTF8 stream, false if not.

◆ NextToken()

const char *BURGER_API Burger::UTF8::NextToken ( const char * pInput )

staticnoexcept

Return the pointer to the next UTF8 token.

Check the UTF8 stream and determine if it's 1-4 bytes in length, then return the supplied pointer incremented by that length. No invalid data checking is performed. Use Burger::UTF8::IsValidSingle(const char *) instead.

Parameters

pInput Pointer to UTF8 data, nullptr will page fault.

Returns: pInput + the number of bytes the current token occupies.

◆ ToGeneric() [1/3]

uintptr_t BURGER_API Burger::UTF8::ToGeneric	(	char *	pOutput,
		uintptr_t	uOutputSize,
		const uint8_t	pTranslateTable[128][4],
		const char *	pInput )

staticnoexcept

Convert a UTF8 stream into a generic "C" string.

Take a "C" string that is using UTF8 encoding and convert it to a generic encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.

Note: This function will ensure that the string is always zero terminated, even if truncation is necessary to get it to fit in the output buffer. Under no circumstances will the output buffer be overrun.

Parameters

pOutput	Pointer to byte buffer to receive the converted string, `nullptr` is okay if uOutputSize is zero, otherwise it will page fault.
uOutputSize	Size of the output buffer in bytes.
pTranslateTable	Pointer to a 128x4 array to use as a UTF8 conversion table.
pInput	A UTF8 encoded "C" string, `nullptr` will page fault.

Returns: Byte count of the potential output. It is valid, even if the output buffer wasn't large enough to contain everything.

◆ ToGeneric() [2/3]

uintptr_t BURGER_API Burger::UTF8::ToGeneric	(	char *	pOutput,
		uintptr_t	uOutputSize,
		const uint8_t	pTranslateTable[128][4],
		const char *	pInput,
		uintptr_t	uInputSize )

staticnoexcept

Convert a UTF8 stream into a generic byte array.

Take a byte array that is using UTF8 encoding and convert it to a generic encoded "C" string. The function will return the size of the string after encoding. This size is valid, even if it exceeded the output buffer size. The output pointer and size can be nullptr to have this routine calculate the size of the possible output so the application can allocate a buffer large enough to hold it.

Note

This function will ensure that the string is always zero terminated, even if truncation is necessary to get it to fit in the output buffer. Under no circumstances will the output buffer be overrun.

Zeros can be encoded into the stream. This function will not early out if a zero was parsed. Zeros will be placed in the generic stream as is.

\param pOutput Pointer to a byte buffer to receive the converted string,
    ``nullptr`` is okay if uOutputSize is zero, otherwise a page fault will
    occur.
\param uOutputSize Size of the output buffer in bytes.
\param pTranslateTable Pointer to a 128x4 array to use as a UTF8 conversion
    table.
\param pInput UTF8 encoded byte array, ``nullptr`` is okay if uInputSize is
    zero.
\param uInputSize Size of the input byte array

\return Byte count of the potential output. It is valid, even if the output
    buffer wasn't large enough to contain everything.

◆ ToGeneric() [3/3]

uint_t BURGER_API Burger::UTF8::ToGeneric	(	const char *	pInput,
		const uint8_t	pTranslateTable[128][4] )

staticnoexcept

Convert a UTF8 stream into a generic 8 bit char.

Take a 1 to 4 byte UTF8 stream and look up the unsigned 8 bit value of the generic character. Codes 0 through 0x7f are considered ASCII while codes 0x80 through 0xFF will be found in the supplied table.

Note: This function does a linear search from the table, as a result, it's not very fast on strings with a lot of high ascii characters

Parameters

pInput	Pointer to UTF8 buffer that contains the valid stream to convert, `nullptr` will page fault.
pTranslateTable	Pointer to a 128x4 array to use as a UTF8 conversion table.

Returns: The unsigned 8 bit character code (0x00-0xFF) or Burger::UTF8::kInvalid if the UTF8 value wasn't low ASCII and couldn't be found in the table.

Member Data Documentation

◆ ByteOrderMark

const uint8_t Burger::UTF8::ByteOrderMark = {0xEF, 0xBB, 0xBF}

static

UTF8 text file signature.

If a raw text file starts with this three byte pattern, you're supposed to assume that all of the text that follows is encoded with UTF8.

Note: An explanation is found here at Unicode.org

◆ kInvalid

Burger::UTF8::kInvalid = UINT32_MAX

static

Value returned if a routine failed.

If a function doesn't return TRUE or FALSE for failure, it will return this value instead. Please see the documentation for each function to know which ones use true/false pairs or this value.

◆ TokenSizeTable

const uint8_t Burger::UTF8::TokenSizeTable

static

Table to determine the size of a UTF8 token stream.

Using the first byte as an index, obtain the size of the stream in bytes from this table. Entries are the numbers 1-4. This table shouldn't be used for error checking and it's only for quick look ups on valid UTF8 streams.

Static Public Member Functions

Static Public Attributes

Detailed Description

Member Function Documentation

◆ FromGeneric() [1/3]

◆ FromGeneric() [2/3]

◆ FromGeneric() [3/3]

◆ FromISOLatin1() [1/3]

◆ FromISOLatin1() [2/3]

◆ FromISOLatin1() [3/3]

◆ FromMacRomanUS() [1/3]

◆ FromMacRomanUS() [2/3]

◆ FromMacRomanUS() [3/3]

◆ FromUTF16() [1/4]

◆ FromUTF16() [2/4]

◆ FromUTF16() [3/4]

◆ FromUTF16() [4/4]

◆ FromUTF32() [1/4]

◆ FromUTF32() [2/4]

◆ FromUTF32() [3/4]

◆ FromUTF32() [4/4]

◆ FromWin1252() [1/3]

◆ FromWin1252() [2/3]

◆ FromWin1252() [3/3]

◆ FromWin437() [1/3]

◆ FromWin437() [2/3]

◆ FromWin437() [3/3]

◆ GetGenericSize() [1/3]

◆ GetGenericSize() [2/3]

◆ GetGenericSize() [3/3]

◆ GetISOLatin1Size() [1/3]

◆ GetISOLatin1Size() [2/3]

◆ GetISOLatin1Size() [3/3]

◆ GetMacRomanUSSize() [1/3]

◆ GetMacRomanUSSize() [2/3]

◆ GetMacRomanUSSize() [3/3]

◆ GetTokenSize()

◆ GetUTF16Size() [1/3]

◆ GetUTF16Size() [2/3]

◆ GetUTF16Size() [3/3]

◆ GetWin1252Size() [1/3]

◆ GetWin1252Size() [2/3]

◆ GetWin1252Size() [3/3]

◆ GetWin437Size() [1/3]

◆ GetWin437Size() [2/3]

◆ GetWin437Size() [3/3]

◆ IsValid() [1/2]

◆ IsValid() [2/2]

◆ IsValidSingle()

◆ NextToken()

◆ ToGeneric() [1/3]

◆ ToGeneric() [2/3]

◆ ToGeneric() [3/3]

Member Data Documentation

◆ ByteOrderMark

◆ kInvalid

◆ TokenSizeTable