// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/
#include "zstd_compress_internal.h"
#include "hist.h"
#include "zstd_opt.h"
#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
|| !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
#define ZSTD_MAX_PRICE (1<<30)
#define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
/*-*************************************
* Price functions for optimal parser
***************************************/
#if 0 /* approximation at bit level (for tests) */
# define BITCOST_ACCURACY 0
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
# define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat))
#elif 0 /* fractional bit accuracy (for tests) */
# define BITCOST_ACCURACY 8
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
# define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat))
#else /* opt==approx, ultra==accurate */
# define BITCOST_ACCURACY 8
# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
# define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
#endif
/* ZSTD_bitWeight() :
* provide estimated "cost" of a stat in full bits only */
MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
{
return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
}
/* ZSTD_fracWeight() :
* provide fractional-bit "cost" of a stat,
* using linear interpolation approximation */
MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
{
U32 const stat = rawStat + 1;
U32 const hb = ZSTD_highbit32(stat);
U32 const BWeight = hb * BITCOST_MULTIPLIER;
/* Fweight was meant for "Fractional weight"
* but it's effectively a value between 1 and 2
* using fixed point arithmetic */
U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
U32 const weight = BWeight + FWeight;
assert(hb + BITCOST_ACCURACY < 31);
return weight;
}
#if (DEBUGLEVEL>=2)
/* debugging function,
* @return price in bytes as fractional value
* for debug messages only */
MEM_STATIC double ZSTD_fCost(int price)
{
return (double)price / (BITCOST_MULTIPLIER*8);
}
#endif
static int ZSTD_compressedLiterals(optState_t const* const optPtr)
{
return optPtr->literalCompressionMode != ZSTD_ps_disable;
}
static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
{
if (ZSTD_compressedLiterals(optPtr))
optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
}
static U32 sum_u32(const unsigned table[], size_t nbElts)
{
size_t n;
U32 total = 0;
for (n=0; n<nbElts; n++) {
total += table[n];
}
return total;
}
typedef enum { base_0possible=0, base_1guaranteed=1 } base_directive_e;
static U32
ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift, base_directive_e base1)
{
U32 s, sum=0;
DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)",
(unsigned)lastEltIndex+1, (unsigned)shift );
assert(shift < 30);
for (s=0; s<lastEltIndex+1; s++) {
unsigned const base = base1 ? 1 : (table[s]>0);
unsigned const newStat = base + (table[s] >> shift);
sum += newStat;
table[s] = newStat;
}
return sum;
}
/* ZSTD_scaleStats() :
* reduce all elt frequencies in table if sum too large
* return the resulting sum of elements */
static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
{
U32 const prevsum = sum_u32(table, lastEltIndex+1);
U32 const factor = prevsum >> logTarget;
DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
assert(logTarget < 30);
if (factor <= 1) return prevsum;
return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed);
}
/* ZSTD_rescaleFreqs() :
* if first block (detected by optPtr->litLengthSum == 0) : init statistics
* take hints from dictionary if there is one
* and init from zero if there is none,
* using src for literals stats, and baseline stats for sequence symbols
* otherwise downscale existing stats, to be used as seed for next block.
*/
static void
ZSTD_rescaleFreqs(optState_t* const optPtr,
const BYTE* const src, size_t const srcSize,
int const optLevel)
{
int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
optPtr->priceType = zop_dynamic;
if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */
/* heuristic: use pre-defined stats for too small inputs */
if (srcSize <= ZSTD_PREDEF_THRESHOLD) {
DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD);
optPtr->priceType = zop_predef;
}
assert(optPtr->symbolCosts != NULL);
if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
/* huffman stats covering the full value set : table presumed generated by dictionary
|