Compress data using Deflate Encoding. More...
Classes | |
struct | CodeData_t |
Structure for each huffman tree entry. More... | |
struct | StaticTreeDesc_t |
Structure for each static huffman tree entry. More... | |
struct | TreeDesc_t |
Public Member Functions | |
const Burger::StaticRTTI * | get_StaticRTTI (void) const noexcept override |
Get the description to the class. | |
CompressDeflate (void) | |
Default constructor. | |
eError | Init (void) override |
Reset the RLE compressor. | |
eError | Process (const void *pInput, uintptr_t uInputLength) override |
Compress the input data using RLE. | |
eError | Finalize (void) override |
Finalize RLE compression. | |
Public Member Functions inherited from Burger::Compress | |
Compress (void) | |
Default constructor. | |
virtual | ~Compress () |
Default destructor. | |
OutputMemoryStream * | GetOutput (void) noexcept |
Get the output data. | |
uintptr_t | GetOutputSize (void) const noexcept |
Get the output data size in bytes. | |
uint32_t | GetSignature (void) const noexcept |
Return the signature for this compressor. | |
Public Member Functions inherited from Burger::Base | |
const char * | get_class_name (void) const noexcept |
Get the name of the class. | |
virtual | ~Base () noexcept=default |
Destructor. | |
Static Public Attributes | |
static const Burger::StaticRTTI | g_StaticRTTI |
The global description of the class. | |
static const uint32_t | Signature = 0x5A4C4942 |
'ZLIB' | |
Static Public Attributes inherited from Burger::Compress | |
static const Burger::StaticRTTI | g_StaticRTTI |
The global description of the class. | |
Static Public Attributes inherited from Burger::Base | |
static const Burger::StaticRTTI | g_StaticRTTI |
The global description of the class. | |
Protected Types | |
enum | eState { DEFAULT_STATE =0 , INIT_STATE =42 , BUSY_STATE =113 , FINISH_STATE =666 } |
enum | eBlockState { STATE_NEEDMORE , STATE_BLOCKDONE , STATE_FINISHSTARTED , STATE_FINISHDONE } |
enum | eDataType { Z_BINARY =0 , Z_ASCII =1 , Z_UNKNOWN =2 } |
enum | eStreamTreeType { STORED_BLOCK =0 , STATIC_TREES =1 , DYN_TREES =2 } |
enum | { MAX_WBITS =15 , MAX_MEM_LEVEL =9 , Z_BEST_COMPRESSION =9 , Z_DEFLATED =8 , SMALLEST =1 , MIN_MATCH =3 , MAX_MATCH =258 , DIST_CODE_LEN =512 , TOO_FAR =4096 , MIN_LOOKAHEAD =(MAX_MATCH+MIN_MATCH+1) , PRESET_DICT =0x20 , LENGTH_CODES =29 , LITERALS =256 , L_CODES =(LITERALS+1+LENGTH_CODES) , D_CODES =30 , BL_CODES =19 , HEAP_SIZE =(2*L_CODES+1) , MAX_BITS =15 , MAX_BL_BITS =7 , END_BLOCK =256 , REP_3_6 =16 , REPZ_3_10 =17 , REPZ_11_138 =18 } |
Protected Member Functions | |
uint_t | TallyLiteral (uint_t uInput) |
uint_t | TallyDistance (uint_t uDistance, uint_t uLength) |
void | SendBits (uint_t uInput, uint_t uLength) |
void | send_code (uint_t uCode, const CodeData_t *pTree) |
void | ClearHash (void) |
uint_t | InsertString (uint_t uStringIndex) |
void | FlushBlock (uint_t bEOF) |
void | OutputBigEndian16 (uint_t b) |
Insert a 16 bit value in the output stream in Big Endian order. | |
void | BitIndexFlush (void) |
Flush the bit buffer, keeping at most 7 bits in it. | |
void | BitIndexFlushToByte (void) |
Flush the bit buffer and align the output on a byte boundary. | |
void | CopyBlock (const uint8_t *pInput, uintptr_t uInputLength) |
Copy a stored block, storing first the length and its one's complement. | |
void | SetDataType (void) |
Determine if the data to compress is ASCII or BINARY. | |
void | InitBlock (void) |
Init a new deflate block. | |
void | StaticTreeInit (void) |
Initialize the tree data structures for a new zlib stream. | |
void | PQDownHeap (const CodeData_t *pTree, int k) |
Restore the heap. | |
void | CompressBlock (const CodeData_t *pLengthTree, const CodeData_t *pDistanceTree) |
Send the block data compressed using the given Huffman trees. | |
uintptr_t | ReadBuffer (uint8_t *pOutput, uintptr_t uOutputSize) |
Read a new buffer from the current input stream. | |
void | FillWindow (void) |
Fill the window when the lookahead becomes insufficient. | |
void | GenerateBitLengths (const TreeDesc_t *pTreeDescription) |
Compute the optimal bit lengths for a tree and update the total bit length for the current block. | |
void | GenerateCodes (CodeData_t *tree, int max_code, uint16_t *bl_count) |
void | BuildTree (TreeDesc_t *desc) |
void | ScanTree (CodeData_t *tree, int max_code) |
int | BuildBitLengthTree (void) |
void | StoredBlock (const uint8_t *buf, uint32_t stored_len, int eof) |
void | SendTree (CodeData_t *tree, int max_code) |
void | SendAllTrees (int lcodes, int dcodes, int blcodes) |
void | FlushBlock (const uint8_t *buf, uint32_t stored_len, uint_t bEOF) |
void | FlushPending (void) |
uint_t | LongestMatch (uint_t cur_match) |
eBlockState | DeflateSlow (int flush) |
void | Align (void) |
int | DeflateEnd (void) |
void | LongestMatchInit (void) |
int | DeflateReset (void) |
int | DeflateInit (void) |
int | PerformDeflate (int flush) |
Static Protected Member Functions | |
static uint_t | UpdateHash (uint_t uHash, uint8_t uInput) |
Protected Attributes | |
const uint8_t * | m_pInput |
Next input byte. | |
uint8_t * | m_pPendingOutput |
Next pending byte to output to the stream. | |
uintptr_t | m_uInputLength |
Number of bytes available at next_in. | |
intptr_t | m_iBlockStart |
Window position at the beginning of the current output block. Gets negative when the window is moved backwards. | |
uint32_t | m_uAdler |
Adler32 value of the uncompressed data. | |
uint32_t | m_uOptimalLength |
bit length of current block with optimal trees | |
uint32_t | m_uStaticLength |
bit length of current block with static trees | |
uint_t | m_uInsertHash |
hash index of string to be inserted | |
uint_t | m_uMatchLength |
Length of best match. | |
uint_t | m_uPreviousMatch |
previous match | |
uint_t | m_bMatchAvailable |
set if previous match exists | |
uint_t | m_uStringStart |
start of string to insert | |
uint_t | m_uMatchStart |
start of matching string | |
uint_t | m_uLookAhead |
number of valid bytes ahead in window | |
uint_t | m_uPreviousLength |
Length of the best match at previous step. Matches not greater than this are discarded. This is used in the lazy match evaluation. | |
uint_t | m_uLastLiteral |
running index in l_buf | |
uint_t | m_uMatches |
number of string matches in current block | |
uint_t | m_uBitIndexBuffer |
Number of valid bits in bi_buf. All bits above the last valid bit are always zero. | |
uint_t | m_uBitIndexValid |
Number of bits in the output buffer. | |
uint_t | m_bInitialized |
TRUE if initialized | |
uint_t | m_uLastEOBLength |
bit length of EOB code for last block | |
int | m_iPending |
Number of bytes in the pending buffer. | |
int | m_bNoHeader |
Suppress zlib header and adler32. | |
int | m_iLastFlush |
Value of flush param for previous deflate call. | |
int | m_iHeapLength |
number of elements in the heap | |
int | m_iHeapMaximum |
element of largest frequency | |
eState | m_eState |
As the name implies. | |
eDataType | m_eDataType |
UNKNOWN, BINARY or ASCII. | |
uint8_t | m_bMethod |
STORED (for zip only) or DEFLATED. | |
CodeData_t | m_DynamicLengthTrees [HEAP_SIZE] |
literal and length tree | |
CodeData_t | m_DynamicDistanceTrees [2 *D_CODES+1] |
distance tree | |
CodeData_t | m_BitLengthTrees [2 *BL_CODES+1] |
Huffman tree for bit lengths. | |
TreeDesc_t | m_LiteralDescription |
desc. for literal tree | |
TreeDesc_t | m_DistanceDescription |
desc. for distance tree | |
TreeDesc_t | m_BitLengthDescription |
desc. for bit length tree | |
int | m_Heap [2 *L_CODES+1] |
heap used to build the Huffman trees | |
uint16_t | m_Head [c_uHashSize] |
Heads of the hash chains or 0. | |
uint16_t | m_Previous [c_uWSize] |
Link to older string with same hash index. To limit the size of this array to 64K, this link is maintained only for the last 32K strings. An index in this array is thus a window index modulo 32K. | |
uint16_t | m_BitLengthCount [MAX_BITS+1] |
MAX_BITS = 15, so this is long aligned. | |
uint16_t | m_DataBuffer [c_uLiteralBufferSize] |
Buffer for distances. To simplify the code, d_buf and l_buf have the same number of elements. To use different lengths, an extra flag array would be necessary. | |
uint8_t | m_LiteralBuffer [c_uLiteralBufferSize] |
buffer for literals or lengths | |
uint8_t | m_PendingBuffer [c_uLiteralBufferSize] |
Output still pending. | |
uint8_t | m_Depth [2 *L_CODES+1] |
Depth of each subtree used as tie breaker for trees of equal frequency. | |
uint8_t | m_Window [c_uWSize *2] |
Sliding window. Input bytes are read into the second half of the window, and move to the first half later to keep a dictionary of at least wSize bytes. With this organization, matches are limited to a distance of wSize-MAX_MATCH bytes, but this ensures that IO is always performed with a length multiple of the block size. | |
Protected Attributes inherited from Burger::Compress | |
OutputMemoryStream | m_Output |
Main output buffer for compressed data. | |
uint32_t | m_uSignature |
4 character code to identify this compressor | |
Static Protected Attributes | |
static const uint_t | c_uBufSize = static_cast<uint_t>(8 * 2*sizeof(uint8_t)) |
Number of bits used within bi_buf. (bi_buf might be implemented on more than 16 bits on some systems.) | |
static const uint_t | c_uWBits = MAX_WBITS |
log2(c_uWSize) (8..16) | |
static const uint_t | c_uWSize = 1 << c_uWBits |
LZ77 window size (32K by default) | |
static const uint_t | c_uWMask = c_uWSize - 1 |
c_uWSize - 1 Use a faster search when the previous match is longer than this | |
static const uint_t | c_uHashBits = MAX_MEM_LEVEL + 7 |
log2(hash_size) | |
static const uint_t | c_uHashSize = 1 << c_uHashBits |
number of elements in hash table | |
static const uint_t | c_uHashMask = c_uHashSize - 1 |
hash_size-1 | |
static const uint_t | c_uHashShift = ((c_uHashBits+MIN_MATCH-1)/MIN_MATCH) |
Number of bits by which m_uInsertHash must be shifted at each input step. It must be such that after MIN_MATCH steps, the oldest byte no longer takes part in the hash key, that is: hash_shift * MIN_MATCH >= hash_bits. | |
static const uint_t | c_uLiteralBufferSize = 1 << (MAX_MEM_LEVEL + 6) |
16K elements by default | |
static const uint_t | c_uWindowSize = 2*c_uWSize |
Actual size of window: 2*wSize, except when the user input buffer is directly used as sliding window. | |
static const uint_t | c_uMaxLazyMatch = 258 |
Attempt to find a better match only when the current match is strictly smaller than this value. This mechanism is used only for compression levels >= 4. | |
static const uint_t | c_uGoodMatch = 32 |
static const int | c_iNiceMatch = 258 |
Stop searching when current match exceeds this. | |
static const uint_t | c_uMaxChainLength = 4096 |
To speed up deflation, hash chains are never searched beyond this length. A higher limit improves compression ratio but degrades the speed. | |
static const int | g_ExtraLengthBits [LENGTH_CODES] |
static const int | g_ExtraDistanceBits [D_CODES] |
static const int | g_ExtraBitLengthBits [BL_CODES] |
static const uint8_t | g_BitLengthOrder [BL_CODES] |
static const CodeData_t | g_StaticLengthTrees [L_CODES+2] |
static const CodeData_t | g_StaticDistanceTrees [D_CODES] |
static const int | g_BaseLengths [LENGTH_CODES] |
static const int | g_BaseDistances [D_CODES] |
static const StaticTreeDesc_t | g_StaticLengthDescription |
static const StaticTreeDesc_t | g_StaticDistanceDescription |
static const StaticTreeDesc_t | g_StaticBitLengthDescription |
static const uint8_t | g_DistanceCodes [DIST_CODE_LEN] |
static const uint8_t | g_LengthCodes [MAX_MATCH-MIN_MATCH+1] |
Compress data using Deflate Encoding.
This format is the one used by ZLIB.
Deutsch, L.P.,"DEFLATE Compressed Data Format Specification". Available in http://www.ietf.org/rfc/rfc1951.txt
A description of the Rabin and Karp algorithm is given in the book "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
Fiala,E.R., and Greene,D.H. Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
Burger::CompressDeflate::CompressDeflate | ( | void | ) |
Default constructor.
Initializes the cache buffer.
|
protected |
|
protected |
Flush the bit buffer, keeping at most 7 bits in it.
Check the output bit bucket and flush up to 16 bits into the byte stream
|
protected |
Flush the bit buffer and align the output on a byte boundary.
|
protected |
|
protected |
|
inlineprotected |
|
protected |
Send the block data compressed using the given Huffman trees.
|
protected |
Copy a stored block, storing first the length and its one's complement.
pInput | Pointer to the data block |
uInputLength | Length of the data block |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
Fill the window when the lookahead becomes insufficient.
Updates m_uStringStart and m_uLookAhead.
IN assertion: lookahead < MIN_LOOKAHEAD OUT assertions: m_uStringStart <= window_size-MIN_LOOKAHEAD At least one byte has been read, or avail_in == 0; reads are performed for at least two bytes (required for the zip translate_eol option – not supported here).
|
overridevirtual |
Finalize RLE compression.
If any data has been cached from the compression stream, flush it into the output
Implements Burger::Compress.
|
protected |
|
inlineprotected |
|
protected |
|
protected |
Compute the optimal bit lengths for a tree and update the total bit length for the current block.
IN assertion: the fields freq and dad are set, heap[heap_max] and above are the tree nodes sorted by increasing frequency. OUT assertions: the field len is set to the optimal bit length, the array bl_count contains the frequencies for each bit length. The length opt_len is updated; static_len is also updated if stree is not null.
|
protected |
|
overridevirtualnoexcept |
Get the description to the class.
This virtual function will pull the pointer to the StaticRTTI instance that has the name of the class. Due to it being virtual, it will be the name of the most derived class.
Reimplemented from Burger::Compress.
|
overridevirtual |
|
protected |
Init a new deflate block.
|
protected |
|
protected |
Insert a 16 bit value in the output stream in Big Endian order.
uInput | 16 bit value to store in the byte stream |
|
protected |
|
protected |
Restore the heap.
Restore the heap property by moving down the tree starting at node k, exchanging a node with the smallest of its two sons if necessary, stopping when the heap property is re-established (each father smaller than its two sons).
pTree | Pointer to the tree array to test |
k | Heap index (Tested against m_iHeapLength) |
|
overridevirtual |
Compress the input data using RLE.
Compresses the data using RLE and stores the compressed data into an OutputMemoryStream
Implements Burger::Compress.
|
protected |
Read a new buffer from the current input stream.
Update the adler32 and total number of bytes read.
|
protected |
|
inlineprotected |
|
protected |
|
protected |
|
protected |
Determine if the data to compress is ASCII or BINARY.
Set the data type to ASCII or BINARY, using a crude approximation: binary if more than 20% of the bytes are <= 6 or >= 128, ASCII otherwise.
Sets the variable m_eDataType to Z_ASCII or Z_BINARY as output
|
protected |
Initialize the tree data structures for a new zlib stream.
|
protected |
|
inlinestaticprotected |
|
staticprotected |
Stop searching when current match exceeds this.
|
staticprotected |
Number of bits used within bi_buf. (bi_buf might be implemented on more than 16 bits on some systems.)
|
staticprotected |
|
staticprotected |
log2(hash_size)
|
staticprotected |
hash_size-1
|
staticprotected |
Number of bits by which m_uInsertHash must be shifted at each input step. It must be such that after MIN_MATCH steps, the oldest byte no longer takes part in the hash key, that is: hash_shift * MIN_MATCH >= hash_bits.
|
staticprotected |
number of elements in hash table
|
staticprotected |
16K elements by default
|
staticprotected |
To speed up deflation, hash chains are never searched beyond this length. A higher limit improves compression ratio but degrades the speed.
|
staticprotected |
Attempt to find a better match only when the current match is strictly smaller than this value. This mechanism is used only for compression levels >= 4.
Actual size of window: 2*wSize, except when the user input buffer is directly used as sliding window.
c_uWSize - 1 Use a faster search when the previous match is longer than this
LZ77 window size (32K by default)
|
staticprotected |
|
staticprotected |
|
staticprotected |
|
staticprotected |
|
staticprotected |
|
staticprotected |
|
staticprotected |
|
staticprotected |
|
staticprotected |
|
staticprotected |
|
staticprotected |
|
staticprotected |
|
staticprotected |
|
static |
The global description of the class.
This record contains the name of this class and a reference to the parent
|
protected |
MAX_BITS = 15, so this is long aligned.
|
protected |
desc. for bit length tree
|
protected |
Huffman tree for bit lengths.
|
protected |
set if previous match exists
|
protected |
STORED (for zip only) or DEFLATED.
|
protected |
Suppress zlib header and adler32.
|
protected |
Buffer for distances. To simplify the code, d_buf and l_buf have the same number of elements. To use different lengths, an extra flag array would be necessary.
|
protected |
Depth of each subtree used as tie breaker for trees of equal frequency.
|
protected |
desc. for distance tree
|
protected |
distance tree
|
protected |
literal and length tree
|
protected |
UNKNOWN, BINARY or ASCII.
|
protected |
As the name implies.
|
protected |
Heads of the hash chains or 0.
|
protected |
heap used to build the Huffman trees
|
protected |
Window position at the beginning of the current output block. Gets negative when the window is moved backwards.
|
protected |
number of elements in the heap
|
protected |
element of largest frequency
|
protected |
Value of flush param for previous deflate call.
|
protected |
Number of bytes in the pending buffer.
|
protected |
buffer for literals or lengths
|
protected |
desc. for literal tree
|
protected |
Output still pending.
|
protected |
Next input byte.
|
protected |
Next pending byte to output to the stream.
|
protected |
Link to older string with same hash index. To limit the size of this array to 64K, this link is maintained only for the last 32K strings. An index in this array is thus a window index modulo 32K.
|
protected |
Adler32 value of the uncompressed data.
|
protected |
Number of valid bits in bi_buf. All bits above the last valid bit are always zero.
|
protected |
Number of bits in the output buffer.
|
protected |
Number of bytes available at next_in.
|
protected |
hash index of string to be inserted
|
protected |
bit length of EOB code for last block
|
protected |
running index in l_buf
|
protected |
number of valid bytes ahead in window
|
protected |
number of string matches in current block
|
protected |
Length of best match.
|
protected |
start of matching string
|
protected |
bit length of current block with optimal trees
|
protected |
Length of the best match at previous step. Matches not greater than this are discarded. This is used in the lazy match evaluation.
|
protected |
previous match
|
protected |
bit length of current block with static trees
|
protected |
start of string to insert
|
protected |
Sliding window. Input bytes are read into the second half of the window, and move to the first half later to keep a dictionary of at least wSize bytes. With this organization, matches are limited to a distance of wSize-MAX_MATCH bytes, but this ensures that IO is always performed with a length multiple of the block size.
|
static |
'ZLIB'