summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKonstantin Komarov <almaz.alexandrovich@paragon-software.com>2021-08-13 17:21:30 +0300
committerKonstantin Komarov <almaz.alexandrovich@paragon-software.com>2021-08-13 07:56:00 -0700
commit522e010b58379fbe19b38fdef5016bca0c3cf405 (patch)
treec04cdf90dc773687fa32513e822f38b30dc9c3e1
parentbe71b5cba2e6485e8959da7a9f9a44461a1bb074 (diff)
downloadlinux-522e010b58379fbe19b38fdef5016bca0c3cf405.tar.gz
linux-522e010b58379fbe19b38fdef5016bca0c3cf405.tar.bz2
linux-522e010b58379fbe19b38fdef5016bca0c3cf405.zip
fs/ntfs3: Add compression
This patch adds different types of NTFS-applicable compressions: - lznt - lzx - xpress Latter two (lzx, xpress) implement Windows Compact OS feature and were taken from ntfs-3g system comression plugin authored by Eric Biggers (https://github.com/ebiggers/ntfs-3g-system-compression) which were ported to ntfs3 and adapted to Linux Kernel environment. Signed-off-by: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
-rw-r--r--fs/ntfs3/lib/decompress_common.c332
-rw-r--r--fs/ntfs3/lib/decompress_common.h352
-rw-r--r--fs/ntfs3/lib/lib.h26
-rw-r--r--fs/ntfs3/lib/lzx_decompress.c683
-rw-r--r--fs/ntfs3/lib/xpress_decompress.c155
-rw-r--r--fs/ntfs3/lznt.c452
6 files changed, 2000 insertions, 0 deletions
diff --git a/fs/ntfs3/lib/decompress_common.c b/fs/ntfs3/lib/decompress_common.c
new file mode 100644
index 000000000000..83c9e93aea77
--- /dev/null
+++ b/fs/ntfs3/lib/decompress_common.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * decompress_common.c - Code shared by the XPRESS and LZX decompressors
+ *
+ * Copyright (C) 2015 Eric Biggers
+ *
+ * This program is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free Software
+ * Foundation, either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "decompress_common.h"
+
+/*
+ * make_huffman_decode_table() -
+ *
+ * Build a decoding table for a canonical prefix code, or "Huffman code".
+ *
+ * This is an internal function, not part of the library API!
+ *
+ * This takes as input the length of the codeword for each symbol in the
+ * alphabet and produces as output a table that can be used for fast
+ * decoding of prefix-encoded symbols using read_huffsym().
+ *
+ * Strictly speaking, a canonical prefix code might not be a Huffman
+ * code. But this algorithm will work either way; and in fact, since
+ * Huffman codes are defined in terms of symbol frequencies, there is no
+ * way for the decompressor to know whether the code is a true Huffman
+ * code or not until all symbols have been decoded.
+ *
+ * Because the prefix code is assumed to be "canonical", it can be
+ * reconstructed directly from the codeword lengths. A prefix code is
+ * canonical if and only if a longer codeword never lexicographically
+ * precedes a shorter codeword, and the lexicographic ordering of
+ * codewords of the same length is the same as the lexicographic ordering
+ * of the corresponding symbols. Consequently, we can sort the symbols
+ * primarily by codeword length and secondarily by symbol value, then
+ * reconstruct the prefix code by generating codewords lexicographically
+ * in that order.
+ *
+ * This function does not, however, generate the prefix code explicitly.
+ * Instead, it directly builds a table for decoding symbols using the
+ * code. The basic idea is this: given the next 'max_codeword_len' bits
+ * in the input, we can look up the decoded symbol by indexing a table
+ * containing 2**max_codeword_len entries. A codeword with length
+ * 'max_codeword_len' will have exactly one entry in this table, whereas
+ * a codeword shorter than 'max_codeword_len' will have multiple entries
+ * in this table. Precisely, a codeword of length n will be represented
+ * by 2**(max_codeword_len - n) entries in this table. The 0-based index
+ * of each such entry will contain the corresponding codeword as a prefix
+ * when zero-padded on the left to 'max_codeword_len' binary digits.
+ *
+ * That's the basic idea, but we implement two optimizations regarding
+ * the format of the decode table itself:
+ *
+ * - For many compression formats, the maximum codeword length is too
+ * long for it to be efficient to build the full decoding table
+ * whenever a new prefix code is used. Instead, we can build the table
+ * using only 2**table_bits entries, where 'table_bits' is some number
+ * less than or equal to 'max_codeword_len'. Then, only codewords of
+ * length 'table_bits' and shorter can be directly looked up. For
+ * longer codewords, the direct lookup instead produces the root of a
+ * binary tree. Using this tree, the decoder can do traditional
+ * bit-by-bit decoding of the remainder of the codeword. Child nodes
+ * are allocated in extra entries at the end of the table; leaf nodes
+ * contain symbols. Note that the long-codeword case is, in general,
+ * not performance critical, since in Huffman codes the most frequently
+ * used symbols are assigned the shortest codeword lengths.
+ *
+ * - When we decode a symbol using a direct lookup of the table, we still
+ * need to know its length so that the bitstream can be advanced by the
+ * appropriate number of bits. The simple solution is to simply retain
+ * the 'lens' array and use the decoded symbol as an index into it.
+ * However, this requires two separate array accesses in the fast path.
+ * The optimization is to store the length directly in the decode
+ * table. We use the bottom 11 bits for the symbol and the top 5 bits
+ * for the length. In addition, to combine this optimization with the
+ * previous one, we introduce a special case where the top 2 bits of
+ * the length are both set if the entry is actually the root of a
+ * binary tree.
+ *
+ * @decode_table:
+ * The array in which to create the decoding table. This must have
+ * a length of at least ((2**table_bits) + 2 * num_syms) entries.
+ *
+ * @num_syms:
+ * The number of symbols in the alphabet; also, the length of the
+ * 'lens' array. Must be less than or equal to 2048.
+ *
+ * @table_bits:
+ * The order of the decode table size, as explained above. Must be
+ * less than or equal to 13.
+ *
+ * @lens:
+ * An array of length @num_syms, indexable by symbol, that gives the
+ * length of the codeword, in bits, for that symbol. The length can
+ * be 0, which means that the symbol does not have a codeword
+ * assigned.
+ *
+ * @max_codeword_len:
+ * The longest codeword length allowed in the compression format.
+ * All entries in 'lens' must be less than or equal to this value.
+ * This must be less than or equal to 23.
+ *
+ * @working_space
+ * A temporary array of length '2 * (max_codeword_len + 1) +
+ * num_syms'.
+ *
+ * Returns 0 on success, or -1 if the lengths do not form a valid prefix
+ * code.
+ */
+int make_huffman_decode_table(u16 decode_table[], const u32 num_syms,
+ const u32 table_bits, const u8 lens[],
+ const u32 max_codeword_len,
+ u16 working_space[])
+{
+ const u32 table_num_entries = 1 << table_bits;
+ u16 * const len_counts = &working_space[0];
+ u16 * const offsets = &working_space[1 * (max_codeword_len + 1)];
+ u16 * const sorted_syms = &working_space[2 * (max_codeword_len + 1)];
+ int left;
+ void *decode_table_ptr;
+ u32 sym_idx;
+ u32 codeword_len;
+ u32 stores_per_loop;
+ u32 decode_table_pos;
+ u32 len;
+ u32 sym;
+
+ /* Count how many symbols have each possible codeword length.
+ * Note that a length of 0 indicates the corresponding symbol is not
+ * used in the code and therefore does not have a codeword.
+ */
+ for (len = 0; len <= max_codeword_len; len++)
+ len_counts[len] = 0;
+ for (sym = 0; sym < num_syms; sym++)
+ len_counts[lens[sym]]++;
+
+ /* We can assume all lengths are <= max_codeword_len, but we
+ * cannot assume they form a valid prefix code. A codeword of
+ * length n should require a proportion of the codespace equaling
+ * (1/2)^n. The code is valid if and only if the codespace is
+ * exactly filled by the lengths, by this measure.
+ */
+ left = 1;
+ for (len = 1; len <= max_codeword_len; len++) {
+ left <<= 1;
+ left -= len_counts[len];
+ if (left < 0) {
+ /* The lengths overflow the codespace; that is, the code
+ * is over-subscribed.
+ */
+ return -1;
+ }
+ }
+
+ if (left) {
+ /* The lengths do not fill the codespace; that is, they form an
+ * incomplete set.
+ */
+ if (left == (1 << max_codeword_len)) {
+ /* The code is completely empty. This is arguably
+ * invalid, but in fact it is valid in LZX and XPRESS,
+ * so we must allow it. By definition, no symbols can
+ * be decoded with an empty code. Consequently, we
+ * technically don't even need to fill in the decode
+ * table. However, to avoid accessing uninitialized
+ * memory if the algorithm nevertheless attempts to
+ * decode symbols using such a code, we zero out the
+ * decode table.
+ */
+ memset(decode_table, 0,
+ table_num_entries * sizeof(decode_table[0]));
+ return 0;
+ }
+ return -1;
+ }
+
+ /* Sort the symbols primarily by length and secondarily by symbol order.
+ */
+
+ /* Initialize 'offsets' so that offsets[len] for 1 <= len <=
+ * max_codeword_len is the number of codewords shorter than 'len' bits.
+ */
+ offsets[1] = 0;
+ for (len = 1; len < max_codeword_len; len++)
+ offsets[len + 1] = offsets[len] + len_counts[len];
+
+ /* Use the 'offsets' array to sort the symbols. Note that we do not
+ * include symbols that are not used in the code. Consequently, fewer
+ * than 'num_syms' entries in 'sorted_syms' may be filled.
+ */
+ for (sym = 0; sym < num_syms; sym++)
+ if (lens[sym])
+ sorted_syms[offsets[lens[sym]]++] = sym;
+
+ /* Fill entries for codewords with length <= table_bits
+ * --- that is, those short enough for a direct mapping.
+ *
+ * The table will start with entries for the shortest codeword(s), which
+ * have the most entries. From there, the number of entries per
+ * codeword will decrease.
+ */
+ decode_table_ptr = decode_table;
+ sym_idx = 0;
+ codeword_len = 1;
+ stores_per_loop = (1 << (table_bits - codeword_len));
+ for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) {
+ u32 end_sym_idx = sym_idx + len_counts[codeword_len];
+
+ for (; sym_idx < end_sym_idx; sym_idx++) {
+ u16 entry;
+ u16 *p;
+ u32 n;
+
+ entry = ((u32)codeword_len << 11) | sorted_syms[sym_idx];
+ p = (u16 *)decode_table_ptr;
+ n = stores_per_loop;
+
+ do {
+ *p++ = entry;
+ } while (--n);
+
+ decode_table_ptr = p;
+ }
+ }
+
+ /* If we've filled in the entire table, we are done. Otherwise,
+ * there are codewords longer than table_bits for which we must
+ * generate binary trees.
+ */
+ decode_table_pos = (u16 *)decode_table_ptr - decode_table;
+ if (decode_table_pos != table_num_entries) {
+ u32 j;
+ u32 next_free_tree_slot;
+ u32 cur_codeword;
+
+ /* First, zero out the remaining entries. This is
+ * necessary so that these entries appear as
+ * "unallocated" in the next part. Each of these entries
+ * will eventually be filled with the representation of
+ * the root node of a binary tree.
+ */
+ j = decode_table_pos;
+ do {
+ decode_table[j] = 0;
+ } while (++j != table_num_entries);
+
+ /* We allocate child nodes starting at the end of the
+ * direct lookup table. Note that there should be
+ * 2*num_syms extra entries for this purpose, although
+ * fewer than this may actually be needed.
+ */
+ next_free_tree_slot = table_num_entries;
+
+ /* Iterate through each codeword with length greater than
+ * 'table_bits', primarily in order of codeword length
+ * and secondarily in order of symbol.
+ */
+ for (cur_codeword = decode_table_pos << 1;
+ codeword_len <= max_codeword_len;
+ codeword_len++, cur_codeword <<= 1) {
+ u32 end_sym_idx = sym_idx + len_counts[codeword_len];
+
+ for (; sym_idx < end_sym_idx; sym_idx++, cur_codeword++) {
+ /* 'sorted_sym' is the symbol represented by the
+ * codeword.
+ */
+ u32 sorted_sym = sorted_syms[sym_idx];
+ u32 extra_bits = codeword_len - table_bits;
+ u32 node_idx = cur_codeword >> extra_bits;
+
+ /* Go through each bit of the current codeword
+ * beyond the prefix of length @table_bits and
+ * walk the appropriate binary tree, allocating
+ * any slots that have not yet been allocated.
+ *
+ * Note that the 'pointer' entry to the binary
+ * tree, which is stored in the direct lookup
+ * portion of the table, is represented
+ * identically to other internal (non-leaf)
+ * nodes of the binary tree; it can be thought
+ * of as simply the root of the tree. The
+ * representation of these internal nodes is
+ * simply the index of the left child combined
+ * with the special bits 0xC000 to distingush
+ * the entry from direct mapping and leaf node
+ * entries.
+ */
+ do {
+ /* At least one bit remains in the
+ * codeword, but the current node is an
+ * unallocated leaf. Change it to an
+ * internal node.
+ */
+ if (decode_table[node_idx] == 0) {
+ decode_table[node_idx] =
+ next_free_tree_slot | 0xC000;
+ decode_table[next_free_tree_slot++] = 0;
+ decode_table[next_free_tree_slot++] = 0;
+ }
+
+ /* Go to the left child if the next bit
+ * in the codeword is 0; otherwise go to
+ * the right child.
+ */
+ node_idx = decode_table[node_idx] & 0x3FFF;
+ --extra_bits;
+ node_idx += (cur_codeword >> extra_bits) & 1;
+ } while (extra_bits != 0);
+
+ /* We've traversed the tree using the entire
+ * codeword, and we're now at the entry where
+ * the actual symbol will be stored. This is
+ * distinguished from internal nodes by not
+ * having its high two bits set.
+ */
+ decode_table[node_idx] = sorted_sym;
+ }
+ }
+ }
+ return 0;
+}
diff --git a/fs/ntfs3/lib/decompress_common.h b/fs/ntfs3/lib/decompress_common.h
new file mode 100644
index 000000000000..66297f398403
--- /dev/null
+++ b/fs/ntfs3/lib/decompress_common.h
@@ -0,0 +1,352 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/*
+ * decompress_common.h - Code shared by the XPRESS and LZX decompressors
+ *
+ * Copyright (C) 2015 Eric Biggers
+ *
+ * This program is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free Software
+ * Foundation, either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/string.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <asm/unaligned.h>
+
+
+/* "Force inline" macro (not required, but helpful for performance) */
+#define forceinline __always_inline
+
+/* Enable whole-word match copying on selected architectures */
+#if defined(__i386__) || defined(__x86_64__) || defined(__ARM_FEATURE_UNALIGNED)
+# define FAST_UNALIGNED_ACCESS
+#endif
+
+/* Size of a machine word */
+#define WORDBYTES (sizeof(size_t))
+
+static forceinline void
+copy_unaligned_word(const void *src, void *dst)
+{
+ put_unaligned(get_unaligned((const size_t *)src), (size_t *)dst);
+}
+
+
+/* Generate a "word" with platform-dependent size whose bytes all contain the
+ * value 'b'.
+ */
+static forceinline size_t repeat_byte(u8 b)
+{
+ size_t v;
+
+ v = b;
+ v |= v << 8;
+ v |= v << 16;
+ v |= v << ((WORDBYTES == 8) ? 32 : 0);
+ return v;
+}
+
+/* Structure that encapsulates a block of in-memory data being interpreted as a
+ * stream of bits, optionally with interwoven literal bytes. Bits are assumed
+ * to be stored in little endian 16-bit coding units, with the bits ordered high
+ * to low.
+ */
+struct input_bitstream {
+
+ /* Bits that have been read from the input buffer. The bits are
+ * left-justified; the next bit is always bit 31.
+ */
+ u32 bitbuf;
+
+ /* Number of bits currently held in @bitbuf. */
+ u32 bitsleft;
+
+ /* Pointer to the next byte to be retrieved from the input buffer. */
+ const u8 *next;
+
+ /* Pointer to just past the end of the input buffer. */
+ const u8 *end;
+};
+
+/* Initialize a bitstream to read from the specified input buffer. */
+static forceinline void init_input_bitstream(struct input_bitstream *is,
+ const void *buffer, u32 size)
+{
+ is->bitbuf = 0;
+ is->bitsleft = 0;
+ is->next = buffer;
+ is->end = is->next + size;
+}
+
+/* Ensure the bit buffer variable for the bitstream contains at least @num_bits
+ * bits. Following this, bitstream_peek_bits() and/or bitstream_remove_bits()
+ * may be called on the bitstream to peek or remove up to @num_bits bits. Note
+ * that @num_bits must be <= 16.
+ */
+static forceinline void bitstream_ensure_bits(struct input_bitstream *is,
+ u32 num_bits)
+{
+ if (is->bitsleft < num_bits) {
+ if (is->end - is->next >= 2) {
+ is->bitbuf |= (u32)get_unaligned_le16(is->next)
+ << (16 - is->bitsleft);
+ is->next += 2;
+ }
+ is->bitsleft += 16;
+ }
+}
+
+/* Return the next @num_bits bits from the bitstream, without removing them.
+ * There must be at least @num_bits remaining in the buffer variable, from a
+ * previous call to bitstream_ensure_bits().
+ */
+static forceinline u32
+bitstream_peek_bits(const struct input_bitstream *is, const u32 num_bits)
+{
+ return (is->bitbuf >> 1) >> (sizeof(is->bitbuf) * 8 - num_bits - 1);
+}
+
+/* Remove @num_bits from the bitstream. There must be at least @num_bits
+ * remaining in the buffer variable, from a previous call to
+ * bitstream_ensure_bits().
+ */
+static forceinline void
+bitstream_remove_bits(struct input_bitstream *is, u32 num_bits)
+{
+ is->bitbuf <<= num_bits;
+ is->bitsleft -= num_bits;
+}
+
+/* Remove and return @num_bits bits from the bitstream. There must be at least
+ * @num_bits remaining in the buffer variable, from a previous call to
+ * bitstream_ensure_bits().
+ */
+static forceinline u32
+bitstream_pop_bits(struct input_bitstream *is, u32 num_bits)
+{
+ u32 bits = bitstream_peek_bits(is, num_bits);
+
+ bitstream_remove_bits(is, num_bits);
+ return bits;
+}
+
+/* Read and return the next @num_bits bits from the bitstream. */
+static forceinline u32
+bitstream_read_bits(struct input_bitstream *is, u32 num_bits)
+{
+ bitstream_ensure_bits(is, num_bits);
+ return bitstream_pop_bits(is, num_bits);
+}
+
+/* Read and return the next literal byte embedded in the bitstream. */
+static forceinline u8
+bitstream_read_byte(struct input_bitstream *is)
+{
+ if (unlikely(is->end == is->next))
+ return 0;
+ return *is->next++;
+}
+
+/* Read and return the next 16-bit integer embedded in the bitstream. */
+static forceinline u16
+bitstream_read_u16(struct input_bitstream *is)
+{
+ u16 v;
+
+ if (unlikely(is->end - is->next < 2))
+ return 0;
+ v = get_unaligned_le16(is->next);
+ is->next += 2;
+ return v;
+}
+
+/* Read and return the next 32-bit integer embedded in the bitstream. */
+static forceinline u32
+bitstream_read_u32(struct input_bitstream *is)
+{
+ u32 v;
+
+ if (unlikely(is->end - is->next < 4))
+ return 0;
+ v = get_unaligned_le32(is->next);
+ is->next += 4;
+ return v;
+}
+
+/* Read into @dst_buffer an array of literal bytes embedded in the bitstream.
+ * Return either a pointer to the byte past the last written, or NULL if the
+ * read overflows the input buffer.
+ */
+static forceinline void *bitstream_read_bytes(struct input_bitstream *is,
+ void *dst_buffer, size_t count)
+{
+ if ((size_t)(is->end - is->next) < count)
+ return NULL;
+ memcpy(dst_buffer, is->next, count);
+ is->next += count;
+ return (u8 *)dst_buffer + count;
+}
+
+/* Align the input bitstream on a coding-unit boundary. */
+static forceinline void bitstream_align(struct input_bitstream *is)
+{
+ is->bitsleft = 0;
+ is->bitbuf = 0;
+}
+
+extern int make_huffman_decode_table(u16 decode_table[], const u32 num_syms,
+ const u32 num_bits, const u8 lens[],
+ const u32 max_codeword_len,
+ u16 working_space[]);
+
+
+/* Reads and returns the next Huffman-encoded symbol from a bitstream. If the
+ * input data is exhausted, the Huffman symbol is decoded as if the missing bits
+ * are all zeroes.
+ */
+static forceinline u32 read_huffsym(struct input_bitstream *istream,
+ const u16 decode_table[],
+ u32 table_bits,
+ u32 max_codeword_len)
+{
+ u32 entry;
+ u32 key_bits;
+
+ bitstream_ensure_bits(istream, max_codeword_len);
+
+ /* Index the decode table by the next table_bits bits of the input. */
+ key_bits = bitstream_peek_bits(istream, table_bits);
+ entry = decode_table[key_bits];
+ if (entry < 0xC000) {
+ /* Fast case: The decode table directly provided the
+ * symbol and codeword length. The low 11 bits are the
+ * symbol, and the high 5 bits are the codeword length.
+ */
+ bitstream_remove_bits(istream, entry >> 11);
+ return entry & 0x7FF;
+ }
+ /* Slow case: The codeword for the symbol is longer than
+ * table_bits, so the symbol does not have an entry
+ * directly in the first (1 << table_bits) entries of the
+ * decode table. Traverse the appropriate binary tree
+ * bit-by-bit to decode the symbol.
+ */
+ bitstream_remove_bits(istream, table_bits);
+ do {
+ key_bits = (entry & 0x3FFF) + bitstream_pop_bits(istream, 1);
+ } while ((entry = decode_table[key_bits]) >= 0xC000);
+ return entry;
+}
+
+/*
+ * Copy an LZ77 match at (dst - offset) to dst.
+ *
+ * The length and offset must be already validated --- that is, (dst - offset)
+ * can't underrun the output buffer, and (dst + length) can't overrun the output
+ * buffer. Also, the length cannot be 0.
+ *
+ * @bufend points to the byte past the end of the output buffer. This function
+ * won't write any data beyond this position.
+ *
+ * Returns dst + length.
+ */
+static forceinline u8 *lz_copy(u8 *dst, u32 length, u32 offset, const u8 *bufend,
+ u32 min_length)
+{
+ const u8 *src = dst - offset;
+
+ /*
+ * Try to copy one machine word at a time. On i386 and x86_64 this is
+ * faster than copying one byte at a time, unless the data is
+ * near-random and all the matches have very short lengths. Note that
+ * since this requires unaligned memory accesses, it won't necessarily
+ * be faster on every architecture.
+ *
+ * Also note that we might copy more than the length of the match. For
+ * example, if a word is 8 bytes and the match is of length 5, then
+ * we'll simply copy 8 bytes. This is okay as long as we don't write
+ * beyond the end of the output buffer, hence the check for (bufend -
+ * end >= WORDBYTES - 1).
+ */
+#ifdef FAST_UNALIGNED_ACCESS
+ u8 * const end = dst + length;
+
+ if (bufend - end >= (ptrdiff_t)(WORDBYTES - 1)) {
+
+ if (offset >= WORDBYTES) {
+ /* The source and destination words don't overlap. */
+
+ /* To improve branch prediction, one iteration of this
+ * loop is unrolled. Most matches are short and will
+ * fail the first check. But if that check passes, then
+ * it becomes increasing likely that the match is long
+ * and we'll need to continue copying.
+ */
+
+ copy_unaligned_word(src, dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+
+ if (dst < end) {
+ do {
+ copy_unaligned_word(src, dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ } while (dst < end);
+ }
+ return end;
+ } else if (offset == 1) {
+
+ /* Offset 1 matches are equivalent to run-length
+ * encoding of the previous byte. This case is common
+ * if the data contains many repeated bytes.
+ */
+ size_t v = repeat_byte(*(dst - 1));
+
+ do {
+ put_unaligned(v, (size_t *)dst);
+ src += WORDBYTES;
+ dst += WORDBYTES;
+ } while (dst < end);
+ return end;
+ }
+ /*
+ * We don't bother with special cases for other 'offset <
+ * WORDBYTES', which are usually rarer than 'offset == 1'. Extra
+ * checks will just slow things down. Actually, it's possible
+ * to handle all the 'offset < WORDBYTES' cases using the same
+ * code, but it still becomes more complicated doesn't seem any
+ * faster overall; it definitely slows down the more common
+ * 'offset == 1' case.
+ */
+ }
+#endif /* FAST_UNALIGNED_ACCESS */
+
+ /* Fall back to a bytewise copy. */
+
+ if (min_length >= 2) {
+ *dst++ = *src++;
+ length--;
+ }
+ if (min_length >= 3) {
+ *dst++ = *src++;
+ length--;
+ }
+ do {
+ *dst++ = *src++;
+ } while (--length);
+
+ return dst;
+}
diff --git a/fs/ntfs3/lib/lib.h b/fs/ntfs3/lib/lib.h
new file mode 100644
index 000000000000..f508fbad2e71
--- /dev/null
+++ b/fs/ntfs3/lib/lib.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Adapted for linux kernel by Alexander Mamaev:
+ * - remove implementations of get_unaligned_
+ * - assume GCC is always defined
+ * - ISO C90
+ * - linux kernel code style
+ */
+
+
+/* globals from xpress_decompress.c */
+struct xpress_decompressor *xpress_allocate_decompressor(void);
+void xpress_free_decompressor(struct xpress_decompressor *d);
+int xpress_decompress(struct xpress_decompressor *__restrict d,
+ const void *__restrict compressed_data,
+ size_t compressed_size,
+ void *__restrict uncompressed_data,
+ size_t uncompressed_size);
+
+/* globals from lzx_decompress.c */
+struct lzx_decompressor *lzx_allocate_decompressor(void);
+void lzx_free_decompressor(struct lzx_decompressor *d);
+int lzx_decompress(struct lzx_decompressor *__restrict d,
+ const void *__restrict compressed_data,
+ size_t compressed_size, void *__restrict uncompressed_data,
+ size_t uncompressed_size);
diff --git a/fs/ntfs3/lib/lzx_decompress.c b/fs/ntfs3/lib/lzx_decompress.c
new file mode 100644
index 000000000000..77a381a693d1
--- /dev/null
+++ b/fs/ntfs3/lib/lzx_decompress.c
@@ -0,0 +1,683 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * lzx_decompress.c - A decompressor for the LZX compression format, which can
+ * be used in "System Compressed" files. This is based on the code from wimlib.
+ * This code only supports a window size (dictionary size) of 32768 bytes, since
+ * this is the only size used in System Compression.
+ *
+ * Copyright (C) 2015 Eric Biggers
+ *
+ * This program is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free Software
+ * Foundation, either version 2 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "decompress_common.h"
+#include "lib.h"
+
+/* Number of literal byte values */
+#define LZX_NUM_CHARS 256
+
+/* The smallest and largest allowed match lengths */
+#define LZX_MIN_MATCH_LEN 2
+#define LZX_MAX_MATCH_LEN 257
+
+/* Number of distinct match lengths that can be represented */
+#define LZX_NUM_LENS (LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1)
+
+/* Number of match lengths for which no length symbol is required */
+#define LZX_NUM_PRIMARY_LENS 7
+#define LZX_NUM_LEN_HEADERS (LZX_NUM_PRIMARY_LENS + 1)
+
+/* Valid values of the 3-bit block type field */
+#define LZX_BLOCKTYPE_VERBATIM 1
+#define LZX_BLOCKTYPE_ALIGNED 2
+#define LZX_BLOCKTYPE_UNCOMPRESSED 3
+
+/* Number of offset slots for a window size of 32768 */
+#define LZX_NUM_OFFSET_SLOTS 30
+
+/* Number of symbols in the main code for a window size of 32768 */
+#define LZX_MAINCODE_NUM_SYMBOLS \
+ (LZX_NUM_CHARS + (LZX_NUM_OFFSET_SLOTS * LZX_NUM_LEN_HEADERS))
+
+/* Number of symbols in the length code */
+#define LZX_LENCODE_NUM_SYMBOLS (LZX_NUM_LENS - LZX_NUM_PRIMARY_LENS)
+
+/* Number of symbols in the precode */
+#define LZX_PRECODE_NUM_SYMBOLS 20
+
+/* Number of bits in which each precode codeword length is represented */
+#define LZX_PRECODE_ELEMENT_SIZE 4
+
+/* Number of low-order bits of each match offset that are entropy-encoded in
+ * aligned offset blocks
+ */
+#define LZX_NUM_ALIGNED_OFFSET_BITS 3
+
+/* Number of symbols in the aligned offset code */
+#define LZX_ALIGNEDCODE_NUM_SYMBOLS (1 << LZX_NUM_ALIGNED_OFFSET_BITS)
+
+/* Mask for the match offset bits that are entropy-encoded in aligned offset
+ * blocks
+ */
+#define LZX_ALIGNED_OFFSET_BITMASK ((1 << LZX_NUM_ALIGNED_OFFSET_BITS) - 1)
+
+/* Number of bits in which each aligned offset codeword length is represented */
+#define LZX_ALIGNEDCODE_ELEMENT_SIZE 3
+
+/* Maximum lengths (in bits) of the codewords in each Huffman code */
+#define LZX_MAX_MAIN_CODEWORD_LEN 16
+#define LZX_MAX_LEN_CODEWORD_LEN 16
+#define LZX_MAX_PRE_CODEWORD_LEN ((1 << LZX_PRECODE_ELEMENT_SIZE) - 1)
+#define LZX_MAX_ALIGNED_CODEWORD_LEN ((1 << LZX_ALIGNEDCODE_ELEMENT_SIZE) - 1)
+
+/* The default "filesize" value used in pre/post-processing. In the LZX format
+ * used in cabinet files this value must be given to the decompressor, whereas
+ * in the LZX format used in WIM files and system-compressed files this value is
+ * fixed at 12000000.
+ */
+#define LZX_DEFAULT_FILESIZE 12000000
+
+/* Assumed block size when the encoded block size begins with a 0 bit. */
+#define LZX_DEFAULT_BLOCK_SIZE 32768
+
+/* Number of offsets in the recent (or "repeat") offsets queue. */
+#define LZX_NUM_RECENT_OFFSETS 3
+
+/* These values are chosen for fast decompression. */
+#define LZX_MAINCODE_TABLEBITS 11
+#define LZX_LENCODE_TABLEBITS 10
+#define LZX_PRECODE_TABLEBITS 6
+#define LZX_ALIGNEDCODE_TABLEBITS 7
+
+#define LZX_READ_LENS_MAX_OVERRUN 50
+
+/* Mapping: offset slot => first match offset that uses that offset slot.
+ */
+static const u32 lzx_offset_slot_base[LZX_NUM_OFFSET_SLOTS + 1] = {
+ 0, 1, 2, 3, 4, /* 0 --- 4 */
+ 6, 8, 12, 16, 24, /* 5 --- 9 */
+ 32, 48, 64, 96, 128, /* 10 --- 14 */
+ 192, 256, 384, 512, 768, /* 15 --- 19 */
+ 1024, 1536, 2048, 3072, 4096, /* 20 --- 24 */
+ 6144, 8192, 12288, 16384, 24576, /* 25 --- 29 */
+ 32768, /* extra */
+};
+
+/* Mapping: offset slot => how many extra bits must be read and added to the
+ * corresponding offset slot base to decode the match offset.
+ */
+static const u8 lzx_extra_offset_bits[LZX_NUM_OFFSET_SLOTS] = {
+ 0, 0, 0, 0, 1,
+ 1, 2, 2, 3, 3,
+ 4, 4, 5, 5, 6,
+ 6, 7, 7, 8, 8,
+ 9, 9, 10, 10, 11,
+ 11, 12, 12, 13, 13,
+};
+
+/* Reusable heap-allocated memory for LZX decompression */
+struct lzx_decompressor {
+
+ /* Huffman decoding tables, and arrays that map symbols to codeword
+ * lengths
+ */
+
+ u16 maincode_decode_table[(1 << LZX_MAINCODE_TABLEBITS) +
+ (LZX_MAINCODE_NUM_SYMBOLS * 2)];
+ u8 maincode_lens[LZX_MAINCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
+
+
+ u16 lencode_decode_table[(1 << LZX_LENCODE_TABLEBITS) +
+ (LZX_LENCODE_NUM_SYMBOLS * 2)];
+ u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
+
+
+ u16 alignedcode_decode_table[(1 << LZX_ALIGNEDCODE_TABLEBITS) +
+ (LZX_ALIGNEDCODE_NUM_SYMBOLS * 2)];
+ u8 alignedcode_lens[LZX_ALIGNEDCODE_NUM_SYMBOLS];
+
+ u16 precode_decode_table[(1 << LZX_PRECODE_TABLEBITS) +
+ (LZX_PRECODE_NUM_SYMBOLS * 2)];
+ u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS];
+
+ /* Temporary space for make_huffman_decode_table() */
+ u16 working_space[2 * (1 + LZX_MAX_MAIN_CODEWORD_LEN) +
+ LZX_MAINCODE_NUM_SYMBOLS];
+};
+
+static void undo_e8_translation(void *target, s32 input_pos)
+{
+ s32 abs_offset, rel_offset;
+
+ abs_offset = get_unaligned_le32(target);
+ if (abs_offset >= 0) {
+ if (abs_offset < LZX_DEFAULT_FILESIZE) {
+ /* "good translation" */
+ rel_offset = abs_offset - input_pos;
+ put_unaligned_le32(rel_offset, target);
+ }
+ } else {
+ if (abs_offset >= -input_pos) {
+ /* "compensating translation" */
+ rel_offset = abs_offset + LZX_DEFAULT_FILESIZE;
+ put_unaligned_le32(rel_offset, target);
+ }
+ }
+}
+
+/*
+ * Undo the 'E8' preprocessing used in LZX. Before compression, the
+ * uncompressed data was preprocessed by changing the targets of suspected x86
+ * CALL instructions from relative offsets to absolute offsets. After
+ * match/literal decoding, the decompressor must undo the translation.
+ */
+static void lzx_postprocess(u8 *data, u32 size)
+{
+ /*
+ * A worthwhile optimization is to push the end-of-buffer check into the
+ * relatively rare E8 case. This is possible if we replace the last six
+ * bytes of data with E8 bytes; then we are guaranteed to hit an E8 byte
+ * before reaching end-of-buffer. In addition, this scheme guarantees
+ * that no translation can begin following an E8 byte in the last 10
+ * bytes because a 4-byte offset containing E8 as its high byte is a
+ * large negative number that is not valid for translation. That is
+ * exactly what we need.
+ */
+ u8 *tail;
+ u8 saved_bytes[6];
+ u8 *p;
+
+ if (size <= 10)
+ return;
+
+ tail = &data[size - 6];
+ memcpy(saved_bytes, tail, 6);
+ memset(tail, 0xE8, 6);
+ p = data;
+ for (;;) {
+ while (*p != 0xE8)
+ p++;
+ if (p >= tail)
+ break;
+ undo_e8_translation(p + 1, p - data);
+ p += 5;
+ }
+ memcpy(tail, saved_bytes, 6);
+}
+
+/* Read a Huffman-encoded symbol using the precode. */
+static forceinline u32 read_presym(const struct lzx_decompressor *d,
+ struct input_bitstream *is)
+{
+ return read_huffsym(is, d->precode_decode_table,
+ LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN);
+}
+
+/* Read a Huffman-encoded symbol using the main code. */
+static forceinline u32 read_mainsym(const struct lzx_decompressor *d,
+ struct input_bitstream *is)
+{
+ return read_huffsym(is, d->maincode_decode_table,
+ LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN);
+}
+
+/* Read a Huffman-encoded symbol using the length code. */
+static forceinline u32 read_lensym(const struct lzx_decompressor *d,
+ struct input_bitstream *is)
+{
+ return read_huffsym(is, d->lencode_decode_table,
+ LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN);
+}
+
+/* Read a Huffman-encoded symbol using the aligned offset code. */