summaryrefslogtreecommitdiff
path: root/lib/zstd/common
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-03-26 21:35:28 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-03-26 21:35:28 -0700
commite61f33273ca755b3e2ebee4520a76097199dc7a8 (patch)
tree969db95c82c606fcd9926b3d7625c009951d08c9 /lib/zstd/common
parent592329e5e94e26080f4815c6cc6cd0f487a91064 (diff)
parent65d1f5507ed2c78c64fce40e44e5574a9419eb09 (diff)
downloadlinux-e61f33273ca755b3e2ebee4520a76097199dc7a8.tar.gz
linux-e61f33273ca755b3e2ebee4520a76097199dc7a8.tar.bz2
linux-e61f33273ca755b3e2ebee4520a76097199dc7a8.zip
Merge tag 'zstd-linus-v6.15-rc1' of https://github.com/terrelln/linux
Pull zstd updates from Nick Terrell: "Update zstd to the latest upstream release v1.5.7. The two major motivations for updating Zstandard are to keep the code up to date, and to expose API's needed by Intel for the QAT compression accelerator. Imported cleanly from the upstream tag v1.5.7-kernel, which is signed by upstream's signing key EF8FE99528B52FFD" Link: https://github.com/facebook/zstd/releases/tag/v1.5.7 Link: https://github.com/facebook/zstd/releases/tag/v1.5.7-kernel Link: https://keyserver.ubuntu.com/pks/lookup?search=EF8FE99528B52FFD&fingerprint=on&op=index * tag 'zstd-linus-v6.15-rc1' of https://github.com/terrelln/linux: zstd: Import upstream v1.5.7
Diffstat (limited to 'lib/zstd/common')
-rw-r--r--lib/zstd/common/allocations.h56
-rw-r--r--lib/zstd/common/bits.h150
-rw-r--r--lib/zstd/common/bitstream.h155
-rw-r--r--lib/zstd/common/compiler.h151
-rw-r--r--lib/zstd/common/cpu.h3
-rw-r--r--lib/zstd/common/debug.c9
-rw-r--r--lib/zstd/common/debug.h37
-rw-r--r--lib/zstd/common/entropy_common.c42
-rw-r--r--lib/zstd/common/error_private.c13
-rw-r--r--lib/zstd/common/error_private.h88
-rw-r--r--lib/zstd/common/fse.h103
-rw-r--r--lib/zstd/common/fse_decompress.c132
-rw-r--r--lib/zstd/common/huf.h240
-rw-r--r--lib/zstd/common/mem.h3
-rw-r--r--lib/zstd/common/portability_macros.h45
-rw-r--r--lib/zstd/common/zstd_common.c38
-rw-r--r--lib/zstd/common/zstd_deps.h16
-rw-r--r--lib/zstd/common/zstd_internal.h153
18 files changed, 697 insertions, 737 deletions
diff --git a/lib/zstd/common/allocations.h b/lib/zstd/common/allocations.h
new file mode 100644
index 000000000000..16c3d08e8d1a
--- /dev/null
+++ b/lib/zstd/common/allocations.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* This file provides custom allocation primitives
+ */
+
+#define ZSTD_DEPS_NEED_MALLOC
+#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
+
+#include "compiler.h" /* MEM_STATIC */
+#define ZSTD_STATIC_LINKING_ONLY
+#include <linux/zstd.h> /* ZSTD_customMem */
+
+#ifndef ZSTD_ALLOCATIONS_H
+#define ZSTD_ALLOCATIONS_H
+
+/* custom memory allocation functions */
+
+MEM_STATIC void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
+{
+ if (customMem.customAlloc)
+ return customMem.customAlloc(customMem.opaque, size);
+ return ZSTD_malloc(size);
+}
+
+MEM_STATIC void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
+{
+ if (customMem.customAlloc) {
+ /* calloc implemented as malloc+memset;
+ * not as efficient as calloc, but next best guess for custom malloc */
+ void* const ptr = customMem.customAlloc(customMem.opaque, size);
+ ZSTD_memset(ptr, 0, size);
+ return ptr;
+ }
+ return ZSTD_calloc(1, size);
+}
+
+MEM_STATIC void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
+{
+ if (ptr!=NULL) {
+ if (customMem.customFree)
+ customMem.customFree(customMem.opaque, ptr);
+ else
+ ZSTD_free(ptr);
+ }
+}
+
+#endif /* ZSTD_ALLOCATIONS_H */
diff --git a/lib/zstd/common/bits.h b/lib/zstd/common/bits.h
new file mode 100644
index 000000000000..c5faaa3d7b08
--- /dev/null
+++ b/lib/zstd/common/bits.h
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_BITS_H
+#define ZSTD_BITS_H
+
+#include "mem.h"
+
+MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
+{
+ assert(val != 0);
+ {
+ static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
+ 30, 22, 20, 15, 25, 17, 4, 8,
+ 31, 27, 13, 23, 21, 19, 16, 7,
+ 26, 12, 18, 6, 11, 5, 10, 9};
+ return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27];
+ }
+}
+
+MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
+{
+ assert(val != 0);
+#if (__GNUC__ >= 4)
+ return (unsigned)__builtin_ctz(val);
+#else
+ return ZSTD_countTrailingZeros32_fallback(val);
+#endif
+}
+
+MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val)
+{
+ assert(val != 0);
+ {
+ static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
+ 11, 14, 16, 18, 22, 25, 3, 30,
+ 8, 12, 20, 28, 15, 17, 24, 7,
+ 19, 27, 23, 6, 26, 5, 4, 31};
+ val |= val >> 1;
+ val |= val >> 2;
+ val |= val >> 4;
+ val |= val >> 8;
+ val |= val >> 16;
+ return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
+ }
+}
+
+MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
+{
+ assert(val != 0);
+#if (__GNUC__ >= 4)
+ return (unsigned)__builtin_clz(val);
+#else
+ return ZSTD_countLeadingZeros32_fallback(val);
+#endif
+}
+
+MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
+{
+ assert(val != 0);
+#if (__GNUC__ >= 4) && defined(__LP64__)
+ return (unsigned)__builtin_ctzll(val);
+#else
+ {
+ U32 mostSignificantWord = (U32)(val >> 32);
+ U32 leastSignificantWord = (U32)val;
+ if (leastSignificantWord == 0) {
+ return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
+ } else {
+ return ZSTD_countTrailingZeros32(leastSignificantWord);
+ }
+ }
+#endif
+}
+
+MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
+{
+ assert(val != 0);
+#if (__GNUC__ >= 4)
+ return (unsigned)(__builtin_clzll(val));
+#else
+ {
+ U32 mostSignificantWord = (U32)(val >> 32);
+ U32 leastSignificantWord = (U32)val;
+ if (mostSignificantWord == 0) {
+ return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
+ } else {
+ return ZSTD_countLeadingZeros32(mostSignificantWord);
+ }
+ }
+#endif
+}
+
+MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
+{
+ if (MEM_isLittleEndian()) {
+ if (MEM_64bits()) {
+ return ZSTD_countTrailingZeros64((U64)val) >> 3;
+ } else {
+ return ZSTD_countTrailingZeros32((U32)val) >> 3;
+ }
+ } else { /* Big Endian CPU */
+ if (MEM_64bits()) {
+ return ZSTD_countLeadingZeros64((U64)val) >> 3;
+ } else {
+ return ZSTD_countLeadingZeros32((U32)val) >> 3;
+ }
+ }
+}
+
+MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
+{
+ assert(val != 0);
+ return 31 - ZSTD_countLeadingZeros32(val);
+}
+
+/* ZSTD_rotateRight_*():
+ * Rotates a bitfield to the right by "count" bits.
+ * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
+ */
+MEM_STATIC
+U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
+ assert(count < 64);
+ count &= 0x3F; /* for fickle pattern recognition */
+ return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
+}
+
+MEM_STATIC
+U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
+ assert(count < 32);
+ count &= 0x1F; /* for fickle pattern recognition */
+ return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
+}
+
+MEM_STATIC
+U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
+ assert(count < 16);
+ count &= 0x0F; /* for fickle pattern recognition */
+ return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
+}
+
+#endif /* ZSTD_BITS_H */
diff --git a/lib/zstd/common/bitstream.h b/lib/zstd/common/bitstream.h
index feef3a1b1d60..86439da0eea7 100644
--- a/lib/zstd/common/bitstream.h
+++ b/lib/zstd/common/bitstream.h
@@ -1,7 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/* ******************************************************************
* bitstream
* Part of FSE library
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
*
* You can contact the author at :
* - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -27,7 +28,7 @@
#include "compiler.h" /* UNLIKELY() */
#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
#include "error_private.h" /* error codes and messages */
-
+#include "bits.h" /* ZSTD_highbit32 */
/*=========================================
* Target specific
@@ -41,12 +42,13 @@
/*-******************************************
* bitStream encoding API (write forward)
********************************************/
+typedef size_t BitContainerType;
/* bitStream can mix input from multiple sources.
* A critical property of these streams is that they encode and decode in **reverse** direction.
* So the first bit sequence you add will be the last to be read, like a LIFO stack.
*/
typedef struct {
- size_t bitContainer;
+ BitContainerType bitContainer;
unsigned bitPos;
char* startPtr;
char* ptr;
@@ -54,7 +56,7 @@ typedef struct {
} BIT_CStream_t;
MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
-MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, BitContainerType value, unsigned nbBits);
MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC);
MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
@@ -63,7 +65,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
*
* bits are first added to a local register.
-* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
+* Local register is BitContainerType, 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
* Writing data into memory is an explicit operation, performed by the flushBits function.
* Hence keep track how many bits are potentially stored into local register to avoid register overflow.
* After a flushBits, a maximum of 7 bits might still be stored into local register.
@@ -80,28 +82,28 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
* bitStream decoding API (read backward)
**********************************************/
typedef struct {
- size_t bitContainer;
+ BitContainerType bitContainer;
unsigned bitsConsumed;
const char* ptr;
const char* start;
const char* limitPtr;
} BIT_DStream_t;
-typedef enum { BIT_DStream_unfinished = 0,
- BIT_DStream_endOfBuffer = 1,
- BIT_DStream_completed = 2,
- BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */
- /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */
+ BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */
+ BIT_DStream_completed = 2, /* bitstream entirely consumed, bit-exact */
+ BIT_DStream_overflow = 3 /* user requested more bits than present in bitstream */
+ } BIT_DStream_status; /* result of BIT_reloadDStream() */
MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
-MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BitContainerType BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
/* Start by invoking BIT_initDStream().
* A chunk of the bitStream is then stored into a local register.
-* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType).
* You can then retrieve bitFields stored into the local register, **in reverse order**.
* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
@@ -113,7 +115,7 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
/*-****************************************
* unsafe API
******************************************/
-MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, BitContainerType value, unsigned nbBits);
/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
@@ -122,33 +124,6 @@ MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
/* faster, but works only if nbBits >= 1 */
-
-
-/*-**************************************************************
-* Internal functions
-****************************************************************/
-MEM_STATIC unsigned BIT_highbit32 (U32 val)
-{
- assert(val != 0);
- {
-# if (__GNUC__ >= 3) /* Use GCC Intrinsic */
- return __builtin_clz (val) ^ 31;
-# else /* Software version */
- static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
- 11, 14, 16, 18, 22, 25, 3, 30,
- 8, 12, 20, 28, 15, 17, 24, 7,
- 19, 27, 23, 6, 26, 5, 4, 31 };
- U32 v = val;
- v |= v >> 1;
- v |= v >> 2;
- v |= v >> 4;
- v |= v >> 8;
- v |= v >> 16;
- return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
-# endif
- }
-}
-
/*===== Local Constants =====*/
static const unsigned BIT_mask[] = {
0, 1, 3, 7, 0xF, 0x1F,
@@ -178,16 +153,22 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
return 0;
}
+FORCE_INLINE_TEMPLATE BitContainerType BIT_getLowerBits(BitContainerType bitContainer, U32 const nbBits)
+{
+ assert(nbBits < BIT_MASK_SIZE);
+ return bitContainer & BIT_mask[nbBits];
+}
+
/*! BIT_addBits() :
* can add up to 31 bits into `bitC`.
* Note : does not check for register overflow ! */
MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
- size_t value, unsigned nbBits)
+ BitContainerType value, unsigned nbBits)
{
DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
assert(nbBits < BIT_MASK_SIZE);
assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
- bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
+ bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos;
bitC->bitPos += nbBits;
}
@@ -195,7 +176,7 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
* works only if `value` is _clean_,
* meaning all high bits above nbBits are 0 */
MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
- size_t value, unsigned nbBits)
+ BitContainerType value, unsigned nbBits)
{
assert((value>>nbBits) == 0);
assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
@@ -242,7 +223,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
BIT_addBitsFast(bitC, 1, 1); /* endMark */
BIT_flushBits(bitC);
if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
- return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
+ return (size_t)(bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
}
@@ -266,35 +247,35 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
bitD->bitContainer = MEM_readLEST(bitD->ptr);
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
- bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
+ bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
} else {
bitD->ptr = bitD->start;
bitD->bitContainer = *(const BYTE*)(bitD->start);
switch(srcSize)
{
- case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
+ case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
ZSTD_FALLTHROUGH;
- case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
+ case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
ZSTD_FALLTHROUGH;
- case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
+ case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
ZSTD_FALLTHROUGH;
- case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
+ case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24;
ZSTD_FALLTHROUGH;
- case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
+ case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16;
ZSTD_FALLTHROUGH;
- case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
+ case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) << 8;
ZSTD_FALLTHROUGH;
default: break;
}
{ BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
- bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
+ bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0;
if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
}
bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
@@ -303,12 +284,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
return srcSize;
}
-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
+FORCE_INLINE_TEMPLATE BitContainerType BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
{
return bitContainer >> start;
}
-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
+FORCE_INLINE_TEMPLATE BitContainerType BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
{
U32 const regMask = sizeof(bitContainer)*8 - 1;
/* if start > regMask, bitstream is corrupted, and result is undefined */
@@ -318,26 +299,20 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c
* such cpus old (pre-Haswell, 2013) and their performance is not of that
* importance.
*/
-#if defined(__x86_64__) || defined(_M_X86)
+#if defined(__x86_64__) || defined(_M_X64)
return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
#else
return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
#endif
}
-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
-{
- assert(nbBits < BIT_MASK_SIZE);
- return bitContainer & BIT_mask[nbBits];
-}
-
/*! BIT_lookBits() :
* Provides next n bits from local register.
* local register is not modified.
* On 32-bits, maxNbBits==24.
* On 64-bits, maxNbBits==56.
* @return : value extracted */
-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
+FORCE_INLINE_TEMPLATE BitContainerType BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
{
/* arbitrate between double-shift and shift+mask */
#if 1
@@ -353,14 +328,14 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U3
/*! BIT_lookBitsFast() :
* unsafe version; only works if nbBits >= 1 */
-MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
+MEM_STATIC BitContainerType BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
{
U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
assert(nbBits >= 1);
return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
}
-MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
{
bitD->bitsConsumed += nbBits;
}
@@ -369,23 +344,38 @@ MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
* Read (consume) next n bits from local register and update.
* Pay attention to not read more than nbBits contained into local register.
* @return : extracted value. */
-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
+FORCE_INLINE_TEMPLATE BitContainerType BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
{
- size_t const value = BIT_lookBits(bitD, nbBits);
+ BitContainerType const value = BIT_lookBits(bitD, nbBits);
BIT_skipBits(bitD, nbBits);
return value;
}
/*! BIT_readBitsFast() :
- * unsafe version; only works only if nbBits >= 1 */
-MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
+ * unsafe version; only works if nbBits >= 1 */
+MEM_STATIC BitContainerType BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
{
- size_t const value = BIT_lookBitsFast(bitD, nbBits);
+ BitContainerType const value = BIT_lookBitsFast(bitD, nbBits);
assert(nbBits >= 1);
BIT_skipBits(bitD, nbBits);
return value;
}
+/*! BIT_reloadDStream_internal() :
+ * Simple variant of BIT_reloadDStream(), with two conditions:
+ * 1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8
+ * 2. look window is valid after shifted down : bitD->ptr >= bitD->start
+ */
+MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD)
+{
+ assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
+ bitD->ptr -= bitD->bitsConsumed >> 3;
+ assert(bitD->ptr >= bitD->start);
+ bitD->bitsConsumed &= 7;
+ bitD->bitContainer = MEM_readLEST(bitD->ptr);
+ return BIT_DStream_unfinished;
+}
+
/*! BIT_reloadDStreamFast() :
* Similar to BIT_reloadDStream(), but with two differences:
* 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
@@ -396,31 +386,35 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
{
if (UNLIKELY(bitD->ptr < bitD->limitPtr))
return BIT_DStream_overflow;
- assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
- bitD->ptr -= bitD->bitsConsumed >> 3;
- bitD->bitsConsumed &= 7;
- bitD->bitContainer = MEM_readLEST(bitD->ptr);
- return BIT_DStream_unfinished;
+ return BIT_reloadDStream_internal(bitD);
}
/*! BIT_reloadDStream() :
* Refill `bitD` from buffer previously set in BIT_initDStream() .
- * This function is safe, it guarantees it will not read beyond src buffer.
+ * This function is safe, it guarantees it will not never beyond src buffer.
* @return : status of `BIT_DStream_t` internal register.
* when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
-MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
{
- if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
+ /* note : once in overflow mode, a bitstream remains in this mode until it's reset */
+ if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) {
+ static const BitContainerType zeroFilled = 0;
+ bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */
+ /* overflow detected, erroneous scenario or end of stream: no update */
return BIT_DStream_overflow;
+ }
+
+ assert(bitD->ptr >= bitD->start);
if (bitD->ptr >= bitD->limitPtr) {
- return BIT_reloadDStreamFast(bitD);
+ return BIT_reloadDStream_internal(bitD);
}
if (bitD->ptr == bitD->start) {
+ /* reached end of bitStream => no update */
if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
return BIT_DStream_completed;
}
- /* start < ptr < limitPtr */
+ /* start < ptr < limitPtr => cautious update */
{ U32 nbBytes = bitD->bitsConsumed >> 3;
BIT_DStream_status result = BIT_DStream_unfinished;
if (bitD->ptr - nbBytes < bitD->start) {
@@ -442,5 +436,4 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
}
-
#endif /* BITSTREAM_H_MODULE */
diff --git a/lib/zstd/common/compiler.h b/lib/zstd/common/compiler.h
index c42d39faf9bd..dc9bd15e174e 100644
--- a/lib/zstd/common/compiler.h
+++ b/lib/zstd/common/compiler.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
/*
- * Copyright (c) Yann Collet, Facebook, Inc.
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
@@ -11,6 +12,8 @@
#ifndef ZSTD_COMPILER_H
#define ZSTD_COMPILER_H
+#include <linux/types.h>
+
#include "portability_macros.h"
/*-*******************************************************
@@ -41,12 +44,15 @@
*/
#define WIN_CDECL
+/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
+#define UNUSED_ATTR __attribute__((unused))
+
/*
* FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
* parameters. They must be inlined for the compiler to eliminate the constant
* branches.
*/
-#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
+#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR UNUSED_ATTR
/*
* HINT_INLINE is used to help the compiler generate better code. It is *not*
* used for "templates", so it can be tweaked based on the compilers
@@ -61,11 +67,21 @@
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
# define HINT_INLINE static INLINE_KEYWORD
#else
-# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
+# define HINT_INLINE FORCE_INLINE_TEMPLATE
#endif
-/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
-#define UNUSED_ATTR __attribute__((unused))
+/* "soft" inline :
+ * The compiler is free to select if it's a good idea to inline or not.
+ * The main objective is to silence compiler warnings
+ * when a defined function in included but not used.
+ *
+ * Note : this macro is prefixed `MEM_` because it used to be provided by `mem.h` unit.
+ * Updating the prefix is probably preferable, but requires a fairly large codemod,
+ * since this name is used everywhere.
+ */
+#ifndef MEM_STATIC /* already defined in Linux Kernel mem.h */
+#define MEM_STATIC static __inline UNUSED_ATTR
+#endif
/* force no inlining */
#define FORCE_NOINLINE static __attribute__((__noinline__))
@@ -86,23 +102,24 @@
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
#elif defined(__aarch64__)
-# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
-# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
+# define PREFETCH_L1(ptr) do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0)
+# define PREFETCH_L2(ptr) do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0)
#else
-# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
-# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
+# define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */
+# define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */
#endif /* NO_PREFETCH */
#define CACHELINE_SIZE 64
-#define PREFETCH_AREA(p, s) { \
- const char* const _ptr = (const char*)(p); \
- size_t const _size = (size_t)(s); \
- size_t _pos; \
- for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
- PREFETCH_L2(_ptr + _pos); \
- } \
-}
+#define PREFETCH_AREA(p, s) \
+ do { \
+ const char* const _ptr = (const char*)(p); \
+ size_t const _size = (size_t)(s); \
+ size_t _pos; \
+ for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
+ PREFETCH_L2(_ptr + _pos); \
+ } \
+ } while (0)
/* vectorization
* older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
@@ -126,16 +143,13 @@
#define UNLIKELY(x) (__builtin_expect((x), 0))
#if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
-# define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); }
+# define ZSTD_UNREACHABLE do { assert(0), __builtin_unreachable(); } while (0)
#else
-# define ZSTD_UNREACHABLE { assert(0); }
+# define ZSTD_UNREACHABLE do { assert(0); } while (0)
#endif
/* disable warnings */
-/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
-
-
/* compile time determination of SIMD support */
/* C-language Attributes are added in C23. */
@@ -158,9 +172,15 @@
#define ZSTD_FALLTHROUGH fallthrough
/*-**************************************************************
-* Alignment check
+* Alignment
*****************************************************************/
+/* @return 1 if @u is a 2^n value, 0 otherwise
+ * useful to check a value is valid for alignment restrictions */
+MEM_STATIC int ZSTD_isPower2(size_t u) {
+ return (u & (u-1)) == 0;
+}
+
/* this test was initially positioned in mem.h,
* but this file is removed (or replaced) for linux kernel
* so it's now hosted in compiler.h,
@@ -175,10 +195,95 @@
#endif /* ZSTD_ALIGNOF */
+#ifndef ZSTD_ALIGNED
+/* C90-compatible alignment macro (GCC/Clang). Adjust for other compilers if needed. */
+#define ZSTD_ALIGNED(a) __attribute__((aligned(a)))
+#endif /* ZSTD_ALIGNED */
+
+
/*-**************************************************************
* Sanitizer
*****************************************************************/
+/*
+ * Zstd relies on pointer overflow in its decompressor