/* ****************************************************************** * FSE : Finite State Entropy codec * Public Prototypes declaration * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ #ifndef FSE_H #define FSE_H /*-***************************************** * Dependencies ******************************************/ #include /* size_t, ptrdiff_t */ namespace duckdb_zstd { /*-***************************************** * FSE_PUBLIC_API : control library symbols visibility ******************************************/ #if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) # define FSE_PUBLIC_API __attribute__ ((visibility ("default"))) #elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ # define FSE_PUBLIC_API __declspec(dllexport) #elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) # define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ #else # define FSE_PUBLIC_API #endif /*------ Version ------*/ #define FSE_VERSION_MAJOR 0 #define FSE_VERSION_MINOR 9 #define FSE_VERSION_RELEASE 0 #define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE #define FSE_QUOTE(str) #str #define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str) #define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION) #define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE) FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ /*-**************************************** * FSE simple functions ******************************************/ /*! FSE_compress() : Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). @return : size of compressed data (<= dstCapacity). Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. if FSE_isError(return), compression failed (more details using FSE_getErrorName()) */ FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize); /*! FSE_decompress(): Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', into already allocated destination buffer 'dst', of size 'dstCapacity'. @return : size of regenerated data (<= maxDstSize), or an error code, which can be tested using FSE_isError() . ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! Why ? : making this distinction requires a header. Header management is intentionally delegated to the user layer, which can better manage special cases. */ FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize); /*-***************************************** * Tool functions ******************************************/ FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */ /* Error Management */ FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ /*-***************************************** * FSE advanced functions ******************************************/ /*! FSE_compress2() : Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' Both parameters can be defined as '0' to mean : use default value @return : size of compressed data Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. if FSE_isError(return), it's an error code. */ FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); /*-***************************************** * FSE detailed API ******************************************/ /*! FSE_compress() does the following: 1. count symbol occurrence from source[] into table count[] (see hist.h) 2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) 3. save normalized counters to memory buffer using writeNCount() 4. build encoding table 'CTable' from normalized counters 5. encode the data stream using encoding table 'CTable' FSE_decompress() does the following: 1. read normalized counters with readNCount() 2. build decoding table 'DTable' from normalized counters 3. decode the data stream using decoding table 'DTable' The following API allows targeting specific sub-functions for advanced tasks. For example, it's possible to compress several blocks using the same 'CTable', or to save and provide normalized distribution using external method. */ /* *** COMPRESSION *** */ /*! FSE_optimalTableLog(): dynamically downsize 'tableLog' when conditions are met. It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. @return : recommended tableLog (necessarily <= 'maxTableLog') */ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); /*! FSE_normalizeCount(): normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). @return : tableLog, or an errorCode, which can be tested using FSE_isError() */ FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue); /*! FSE_NCountWriteBound(): Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. Typically useful for allocation purpose. */ FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); /*! FSE_writeNCount(): Compactly save 'normalizedCounter' into 'buffer'. @return : size of the compressed table, or an errorCode, which can be tested using FSE_isError(). */ FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); /*! Constructor and Destructor of FSE_CTable. Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog); FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); /*! FSE_buildCTable(): Builds `ct`, which must be already allocated, using FSE_createCTable(). @return : 0, or an errorCode, which can be tested using FSE_isError() */ FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); /*! FSE_compress_usingCTable(): Compress `src` using `ct` into `dst` which must be already allocated. @return : size of compressed data (<= `dstCapacity`), or 0 if compressed data could not fit into `dst`, or an errorCode, which can be tested using FSE_isError() */ FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); /*! Tutorial : ---------- The first step is to count all symbols. FSE_count() does this job very fast. Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. 'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) FSE_count() will return the number of occurrence of the most frequent symbol. This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). The next step is to normalize the frequencies. FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. It also guarantees a minimum of 1 to any Symbol with frequency >= 1. You can use 'tableLog'==0 to mean "use default tableLog value". If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). The result of FSE_normalizeCount() will be saved into a table, called 'normalizedCounter', which is a table of signed short. 'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. The return value is tableLog if everything proceeded as expected. It is 0 if there is a single symbol within distribution. If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). 'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). 'buffer' must be already allocated. For guaranteed success, buffer size must be at least FSE_headerBound(). The result of the function is the number of bytes written into 'buffer'. If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). 'normalizedCounter' can then be used to create the compression table 'CTable'. The space required by 'CTable' must be already allocated, using FSE_createCTable(). You can then use FSE_buildCTable() to fill 'CTable'. If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). 'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. If it returns '0', compressed data could not fit into 'dst'. If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). */ /* *** DECOMPRESSION *** */ /*! FSE_readNCount(): Read compactly saved 'normalizedCounter' from 'rBuffer'. @return : size read from 'rBuffer', or an errorCode, which can be tested using FSE_isError(). maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize); /*! Constructor and Destructor of FSE_DTable. Note that its size depends on 'tableLog' */ typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog); FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt); /*! FSE_buildDTable(): Builds 'dt', which must be already allocated, using FSE_createDTable(). return : 0, or an errorCode, which can be tested using FSE_isError() */ FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); /*! FSE_decompress_usingDTable(): Decompress compressed source `cSrc` of size `cSrcSize` using `dt` into `dst` which must be already allocated. @return : size of regenerated data (necessarily <= `dstCapacity`), or an errorCode, which can be tested using FSE_isError() */ FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); /*! Tutorial : ---------- (Note : these functions only decompress FSE-compressed blocks. If block is uncompressed, use memcpy() instead If block is a single repeated byte, use memset() instead ) The first step is to obtain the normalized frequencies of symbols. This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). 'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. In practice, that means it's necessary to know 'maxSymbolValue' beforehand, or size the table to handle worst case situations (typically 256). FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. If there is an error, the function will return an error code, which can be tested using FSE_isError(). The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. This is performed by the function FSE_buildDTable(). The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). If there is an error, the function will return an error code, which can be tested using FSE_isError(). `FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). `cSrcSize` must be strictly correct, otherwise decompression will fail. FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) */ } #endif /* FSE_H */