diff options
author | Enzo Matsumiya <ematsumiya@suse.de> | 2024-08-13 20:24:25 -0300 |
---|---|---|
committer | Enzo Matsumiya <ematsumiya@suse.de> | 2024-08-15 13:35:06 -0300 |
commit | ae21257b9cc578b7e4c99033a678b67dd367084b (patch) | |
tree | 9cb6618105236edd494e2a1cb1fdf057e0d8a0e1 | |
parent | 7167cd9466b4e8734c2f7510f37f8ea20c2dbd21 (diff) | |
download | linux-ae21257b9cc578b7e4c99033a678b67dd367084b.tar.gz linux-ae21257b9cc578b7e4c99033a678b67dd367084b.tar.bz2 linux-ae21257b9cc578b7e4c99033a678b67dd367084b.zip |
remove leftover
Signed-off-by: Enzo Matsumiya <ematsumiya@suse.de>
-rw-r--r-- | fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.c | 458 | ||||
-rw-r--r-- | fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.h | 20 |
2 files changed, 0 insertions, 478 deletions
diff --git a/fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.c b/fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.c deleted file mode 100644 index 1d949ee823dc..000000000000 --- a/fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.c +++ /dev/null @@ -1,458 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2024, SUSE LLC - * - * Authors: Enzo Matsumiya <ematsumiya@suse.de> - * - * Implementation of the LZ77 "plain" compression algorithm, as per MS-XCA spec. - */ -#include <linux/prefetch.h> -#include <linux/slab.h> -#include <linux/sizes.h> -#include <linux/count_zeros.h> - -#include "../compress.h" -#include "lz77.h" - -#ifdef CONFIG_CIFS_DEBUG2 -# define LZ77_STATS_ENABLED 1 -#else -# define LZ77_STATS_ENABLED 0 -#endif - -/* - * Compression parameters. - * - * - finding closer and smaller matches is faster, but also compresses worse, e.g. - * match min len = 4, max dist = 4K compared to match min len = 8, max dist = 8K: - * the former is ~10% faster, but yields a ~5% bigger output - * - we don't really care about a maximum match length -- it's simply much, much faster to compute - * a very long match length (e.g. >2M) and write only its encoded bytes than to write e.g. 500K - * of literals - */ -#define LZ77_MATCH_MIN_LEN 4 -#define LZ77_MATCH_MAX_LEN S32_MAX -#define LZ77_MATCH_MIN_DIST 1 -#define LZ77_MATCH_MAX_DIST SZ_2K -#define LZ77_HASH_LOG 15 -#define LZ77_HASH_SIZE (1 << LZ77_HASH_LOG) -#define LZ77_HASH_MASK (LZ77_HASH_SIZE - 1) - -struct lz77_flag { - u8 *pos; - u32 count; - long val; /* This must be signed. */ -}; - -#define lz77_read(nbits, _ptr) get_unaligned((const u ## nbits *)(_ptr)) -#define lz77_write(nbits, _ptr, _v) put_unaligned_le ## nbits((_v), (typeof((_v)) *)(_ptr)) - -static __always_inline u16 lz77_read16(const void *ptr) -{ - return lz77_read(16, ptr); -} - -static __always_inline u32 lz77_read32(const void *ptr) -{ - return lz77_read(32, ptr); -} - -static __always_inline u64 lz77_read64(const void *ptr) -{ - return lz77_read(64, ptr); -} - -static __always_inline void lz77_write8(void *ptr, u8 v) -{ - *(u8 *)ptr = v; -} - -static __always_inline void lz77_write16(void *ptr, u16 v) -{ - lz77_write(16, ptr, v); -} - -static __always_inline void lz77_write32(void *ptr, u32 v) -{ - lz77_write(32, ptr, v); -} - -static __always_inline u16 lz77_hash_bytes16(const u8 *ptr) -{ - /* A "hash function" -- simple, fast, and enough for our use-case. */ - return ((lz77_read32(ptr) * 2654435769LL) >> 16) & LZ77_HASH_MASK; -} - -static __always_inline u32 lz77_hash_bytes32(const u8 *ptr) -{ - /* A "hash function" -- simple, fast, and enough for our use-case. */ - return ((lz77_read32(ptr) * 2654435769LL) >> (32 - LZ77_HASH_LOG)) & LZ77_HASH_MASK; -} - -static __always_inline u64 lz77_hash_bytes64(const u8 *ptr) -{ - /* A "hash function" -- simple, fast, and enough for our use-case. */ - return ((lz77_read64(ptr) << 24) * 889523592379ULL) >> (64 - LZ77_HASH_LOG); -} - -#define lz77_hash_bytes lz77_hash_bytes64 - -/* - * Count common bytes in @diff, where @diff is: - * (*(u64 *)bufA ^ *(u64)bufB) - */ -static __always_inline u32 lz77_count_common_bytes(const u64 diff) -{ - return count_trailing_zeros(diff) >> 3; -} - -static __always_inline u32 lz77_match_len(const u8 *match, const u8 *cur, const u8 *end) -{ - const u8 *start = cur; - u32 step = sizeof(u32); - u64 diff; - - if (cur > end - step) - return 0; - - if (lz77_read32(cur) ^ lz77_read32(match)) - return 0; - - cur += step; - match += step; - step = sizeof(u64); - - while (likely(cur < end - (step - 1))) { - diff = lz77_read64(cur) ^ lz77_read64(match); - if (!diff) { - cur += step; - match += step; - - continue; - } - - cur += lz77_count_common_bytes(diff); - - return (u32)(cur - start); - } - - step = sizeof(u32); - - if (cur < end - (step - 1) && !(lz77_read32(cur) ^ lz77_read32(match))) { - cur += step; - match += step; - } - - step = sizeof(u16); - - if (cur < end - (step - 1) && lz77_read16(cur) == lz77_read16(match)) { - cur += step; - match += step; - } - - if (cur < end && *cur == *match) - cur++; - - return (u32)(cur - start); -} - -static __always_inline u8 *lz77_write_match(u8 *dst, u8 **nib, u32 dist, u32 len) -{ - len -= 3; - dist--; - dist <<= 3; - - if (len < 7) { - lz77_write16(dst, dist + len); - - return dst + 2; - } - - dist |= 7; - lz77_write16(dst, dist); - dst += 2; - len -= 7; - - if (!*nib) { - lz77_write8(dst, umin(len, 15)); - *nib = dst; - dst++; - } else { - **nib |= umin(len, 15) << 4; - *nib = NULL; - } - - if (len < 15) - return dst; - - len -= 15; - if (len < 255) { - lz77_write8(dst, len); - - return dst + 1; - } - - lz77_write8(dst, 0xff); - dst++; - len += 7 + 15; - - if (len <= 0xffff) { - lz77_write16(dst, len); - - return dst + 2; - } - - lz77_write16(dst, 0); - dst += 2; - lz77_write32(dst, len); - - return dst + 4; -} - -static __always_inline void lz77_copy(u8 *dst, const u8 *src, size_t count) -{ - while (count > 8) { - *(u64 *)dst = *(u64 *)src; - count -= 8; - src += 8; - dst += 8; - } - - while (count > 4) { - *(u32 *)dst = *(u32 *)src; - count -= 4; - src += 4; - dst += 4; - } - - if (count) - memcpy(dst, src, count); -} - -/* - * Writing literals is the most expensive operation in the algorithm. Every and any perf - * improvements here are welcome. - * - * Note: "O3" will actually slow things down for some kinds of input, whereas "O2" improves in all - * (tested) scenarios. - */ -#pragma GCC push_options -#pragma GCC optimize ("O2") -static __maybe_unused __always_inline u8 *lz77_write_literals(u8 *dst, const u8 *src, u32 count, struct lz77_flag *flags) -{ - u32 c = umin(count, 32 - flags->count); - - /* - * Writes split in 2 parts -- fast path, where we don't need to check the flag count on - * every iteration, and the slow path, for fast path leftovers or when writing < 32 - * literals. - */ - while (c == 32) { - lz77_copy(dst, src, c); - - dst += c; - src += c; - count -= c; - - flags->val <<= c; - lz77_write32(flags->pos, flags->val); - flags->count = 0; - flags->pos = dst; - dst += 4; - - c = umin(count, 32); - } - - while (count) { - lz77_copy(dst, src, c); - - dst += c; - src += c; - count -= c; - - flags->val <<= c; - flags->count += c; - if (flags->count == 32) { - lz77_write32(flags->pos, flags->val); - flags->count = 0; - flags->pos = dst; - dst += 4; - } - - c = umin(count, 32); - } - - return dst; -} -#pragma GCC pop_options - -noinline int lz77_compress(const u8 *src, u32 slen, u8 *dst, u32 *dlen) -{ - const u8 *srcp, *end, *anchor; - //struct lz77_flag flags = { 0 }; - long f = 0; - unsigned long _s, _e; - u8 *dstp, *nib, *fp; - u64 *htable; - u32 fc = 0; - int ret; - - if (!src || !dst) - return -EINVAL; - - srcp = src; - anchor = srcp; - end = src + slen; - - dstp = dst; - nib = NULL; - - /* Output buffer start with a flag. */ - //flags.pos = dstp; - fp = dstp; - dstp += 4; - - htable = kvcalloc(LZ77_HASH_SIZE, sizeof(*htable), GFP_KERNEL); - if (!htable) - return -ENOMEM; - - if (lz77_read32(srcp) ^ lz77_read32(srcp + LZ77_MATCH_MIN_LEN)) { - u32 hash; - - lz77_copy(dstp, srcp, LZ77_MATCH_MIN_LEN); - dstp += LZ77_MATCH_MIN_LEN; - fc += LZ77_MATCH_MIN_LEN; - -#define htable_add(_p, _i) \ -({ \ - hash = lz77_hash_bytes(_p); \ - htable[hash] = (_i); \ -}) - htable_add(srcp++, 0); - htable_add(srcp++, 1); - htable_add(srcp++, 2); - htable_add(srcp++, 3); - } - - _s = jiffies; -find_matches: - while (likely(srcp + 32 + LZ77_MATCH_MIN_LEN < end)) { - u32 offset, dist, len = 0; - u64 hash; - //while (likely(srcp + LZ77_MATCH_MIN_LEN < end)) { - //while (likely(len == 0) || fc < 32) { - for (; fc < 32 && len == 0; fc++, srcp++) { - offset = srcp - src; - - hash = lz77_hash_bytes(srcp); - dist = offset - htable[hash]; - - if (dist >= LZ77_MATCH_MIN_DIST && dist < LZ77_MATCH_MAX_DIST) - len = lz77_match_len(src + htable[hash], srcp, end); - - htable[hash] = offset; - - //if (len >= LZ77_MATCH_MIN_LEN) - // break; - - //dist = 0; - //len = 0; - //srcp++; - } - - if (len == 0) { - //WARN(fc > 32, "%s: flag count (%u) > 32 (unexpected) (src pos %zu)\n", __func__, fc, srcp - src); - - lz77_copy(dstp, anchor, 32); - - dstp += 32; - anchor = srcp; - - f <<= 32; - *fp = f; - fc -= 32; - fp = dstp; - dstp += 4; - - goto find_matches; - } - - //if (unlikely(srcp + LZ77_MATCH_MIN_LEN >= end)) { - // srcp = anchor; - // break; - //} - - //dstp = lz77_write_literals(dstp, anchor, srcp - anchor, &flags); - dstp = lz77_write_match(dstp, &nib, dist, len); - srcp += len; - anchor = srcp; - - f = (f << 1) | 1; - fc++; - if (fc == 32) { - *fp = f; - fc = 0; - fp = dstp; - dstp += 4; - } - -#if 0 - flags.val = (flags.val << 1) | 1; - flags.count++; - if (flags.count == 32) { - lz77_write32(flags.pos, flags.val); - flags.count = 0; - flags.pos = dstp; - dstp += 4; - } -#endif - - htable_add(srcp, srcp++ - src); - htable_add(srcp, srcp++ - src); - htable_add(srcp, srcp++ - src); - htable_add(srcp, srcp++ - src); - } - - //if (srcp < end) { - //dstp = lz77_write_literals(dstp, srcp, end - srcp, &flags); - while (srcp < end) { - u32 c = umin(end - srcp, 32 - fc); - - lz77_copy(dstp, srcp, c); - - dstp += c; - srcp += c; - - f <<= c; - fc += c; - if (fc == 32) { - *fp = f; - fc = 0; - fp = dstp; - dstp += 4; - } - } - - f <<= (32 - fc); - f |= (1 << (32 - fc)) - 1; - *fp = f; -#if 0 - flags.val <<= (32 - flags.count); - flags.val |= (1 << (32 - flags.count)) - 1; - lz77_write32(flags.pos, flags.val); -#endif - - _e = jiffies; - - *dlen = dstp - (u8 *)dst; - pr_err("%s: took %ums to compress (slen=%u, dlen=%u)\n", __func__, jiffies_to_msecs(_e - _s), slen, *dlen); - - if (*dlen < slen) - ret = 0; - else - ret = -EMSGSIZE; - - kvfree(htable); - - return ret; -} diff --git a/fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.h b/fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.h deleted file mode 100644 index 904023f0fb4d..000000000000 --- a/fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2024, SUSE LLC - * - * Authors: Enzo Matsumiya <ematsumiya@suse.de> - * - * Implementation of the LZ77 "plain" compression algorithm, as per MS-XCA spec. - */ -#ifndef _SMB_COMPRESS_LZ77_H -#define _SMB_COMPRESS_LZ77_H - -#include <linux/kernel.h> - -static __always_inline size_t lz77_compress_mem_required(size_t len) -{ - return len + (len >> 3); -} - -int lz77_compress(const u8 *src, u32 slen, u8 *dst, u32 *dlen); -#endif /* _SMB_COMPRESS_LZ77_H */ |