/* ****************************************************************** * hist : Histogram functions * part of Finite State Entropy project * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. * * You can contact the author at : * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy * - Public forum : https://groups.google.com/forum/#!forum/lz4c * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ /* --- dependencies --- */ #include "../common/mem.h" /* U32, BYTE, etc. */ #include "../common/debug.h" /* assert, DEBUGLOG */ #include "../common/error_private.h" /* ERROR */ #include "hist.h" /* --- Error management --- */ unsigned HIST_isError(size_t code) { return ERR_isError(code); } /*-************************************************************** * Histogram functions ****************************************************************/ unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize) { const BYTE* ip = (const BYTE*)src; const BYTE* const end = ip + srcSize; unsigned maxSymbolValue = *maxSymbolValuePtr; unsigned largestCount=0; memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } while (ip largestCount) largestCount = count[s]; } return largestCount; } typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e; /* HIST_count_parallel_wksp() : * store histogram into 4 intermediate tables, recombined at the end. * this design makes better use of OoO cpus, * and is noticeably faster when some values are heavily repeated. * But it needs some additional workspace for intermediate tables. * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32. * @return : largest histogram frequency, * or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */ static size_t HIST_count_parallel_wksp( unsigned* count, unsigned* maxSymbolValuePtr, const void* source, size_t sourceSize, HIST_checkInput_e check, U32* const workSpace) { const BYTE* ip = (const BYTE*)source; const BYTE* const iend = ip+sourceSize; unsigned maxSymbolValue = *maxSymbolValuePtr; unsigned max=0; U32* const Counting1 = workSpace; U32* const Counting2 = Counting1 + 256; U32* const Counting3 = Counting2 + 256; U32* const Counting4 = Counting3 + 256; memset(workSpace, 0, 4*256*sizeof(unsigned)); /* safety checks */ if (!sourceSize) { memset(count, 0, maxSymbolValue + 1); *maxSymbolValuePtr = 0; return 0; } if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ /* by stripes of 16 bytes */ { U32 cached = MEM_read32(ip); ip += 4; while (ip < iend-15) { U32 c = cached; cached = MEM_read32(ip); ip += 4; Counting1[(BYTE) c ]++; Counting2[(BYTE)(c>>8) ]++; Counting3[(BYTE)(c>>16)]++; Counting4[ c>>24 ]++; c = cached; cached = MEM_read32(ip); ip += 4; Counting1[(BYTE) c ]++; Counting2[(BYTE)(c>>8) ]++; Counting3[(BYTE)(c>>16)]++; Counting4[ c>>24 ]++; c = cached; cached = MEM_read32(ip); ip += 4; Counting1[(BYTE) c ]++; Counting2[(BYTE)(c>>8) ]++; Counting3[(BYTE)(c>>16)]++; Counting4[ c>>24 ]++; c = cached; cached = MEM_read32(ip); ip += 4; Counting1[(BYTE) c ]++; Counting2[(BYTE)(c>>8) ]++; Counting3[(BYTE)(c>>16)]++; Counting4[ c>>24 ]++; } ip-=4; } /* finish last symbols */ while (ipmaxSymbolValue; s--) { Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); } } { U32 s; if (maxSymbolValue > 255) maxSymbolValue = 255; for (s=0; s<=maxSymbolValue; s++) { count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; if (count[s] > max) max = count[s]; } } while (!count[maxSymbolValue]) maxSymbolValue--; *maxSymbolValuePtr = maxSymbolValue; return (size_t)max; } /* HIST_countFast_wksp() : * Same as HIST_countFast(), but using an externally provided scratch buffer. * `workSpace` is a writable buffer which must be 4-bytes aligned, * `workSpaceSize` must be >= HIST_WKSP_SIZE */ size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* source, size_t sourceSize, void* workSpace, size_t workSpaceSize) { if (sourceSize < 1500) /* heuristic threshold */ return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize); if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace); } /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* source, size_t sourceSize) { unsigned tmpCounters[HIST_WKSP_SIZE_U32]; return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters)); } /* HIST_count_wksp() : * Same as HIST_count(), but using an externally provided scratch buffer. * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* source, size_t sourceSize, void* workSpace, size_t workSpaceSize) { if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); if (*maxSymbolValuePtr < 255) return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace); *maxSymbolValuePtr = 255; return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize); } size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize) { unsigned tmpCounters[HIST_WKSP_SIZE_U32]; return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters)); }