summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEnzo Matsumiya <ematsumiya@suse.de>2024-08-13 20:24:25 -0300
committerEnzo Matsumiya <ematsumiya@suse.de>2024-08-15 13:35:06 -0300
commitae21257b9cc578b7e4c99033a678b67dd367084b (patch)
tree9cb6618105236edd494e2a1cb1fdf057e0d8a0e1
parent7167cd9466b4e8734c2f7510f37f8ea20c2dbd21 (diff)
downloadlinux-ae21257b9cc578b7e4c99033a678b67dd367084b.tar.gz
linux-ae21257b9cc578b7e4c99033a678b67dd367084b.tar.bz2
linux-ae21257b9cc578b7e4c99033a678b67dd367084b.zip
remove leftover
Signed-off-by: Enzo Matsumiya <ematsumiya@suse.de>
-rw-r--r--fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.c458
-rw-r--r--fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.h20
2 files changed, 0 insertions, 478 deletions
diff --git a/fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.c b/fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.c
deleted file mode 100644
index 1d949ee823dc..000000000000
--- a/fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.c
+++ /dev/null
@@ -1,458 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2024, SUSE LLC
- *
- * Authors: Enzo Matsumiya <ematsumiya@suse.de>
- *
- * Implementation of the LZ77 "plain" compression algorithm, as per MS-XCA spec.
- */
-#include <linux/prefetch.h>
-#include <linux/slab.h>
-#include <linux/sizes.h>
-#include <linux/count_zeros.h>
-
-#include "../compress.h"
-#include "lz77.h"
-
-#ifdef CONFIG_CIFS_DEBUG2
-# define LZ77_STATS_ENABLED 1
-#else
-# define LZ77_STATS_ENABLED 0
-#endif
-
-/*
- * Compression parameters.
- *
- * - finding closer and smaller matches is faster, but also compresses worse, e.g.
- * match min len = 4, max dist = 4K compared to match min len = 8, max dist = 8K:
- * the former is ~10% faster, but yields a ~5% bigger output
- * - we don't really care about a maximum match length -- it's simply much, much faster to compute
- * a very long match length (e.g. >2M) and write only its encoded bytes than to write e.g. 500K
- * of literals
- */
-#define LZ77_MATCH_MIN_LEN 4
-#define LZ77_MATCH_MAX_LEN S32_MAX
-#define LZ77_MATCH_MIN_DIST 1
-#define LZ77_MATCH_MAX_DIST SZ_2K
-#define LZ77_HASH_LOG 15
-#define LZ77_HASH_SIZE (1 << LZ77_HASH_LOG)
-#define LZ77_HASH_MASK (LZ77_HASH_SIZE - 1)
-
-struct lz77_flag {
- u8 *pos;
- u32 count;
- long val; /* This must be signed. */
-};
-
-#define lz77_read(nbits, _ptr) get_unaligned((const u ## nbits *)(_ptr))
-#define lz77_write(nbits, _ptr, _v) put_unaligned_le ## nbits((_v), (typeof((_v)) *)(_ptr))
-
-static __always_inline u16 lz77_read16(const void *ptr)
-{
- return lz77_read(16, ptr);
-}
-
-static __always_inline u32 lz77_read32(const void *ptr)
-{
- return lz77_read(32, ptr);
-}
-
-static __always_inline u64 lz77_read64(const void *ptr)
-{
- return lz77_read(64, ptr);
-}
-
-static __always_inline void lz77_write8(void *ptr, u8 v)
-{
- *(u8 *)ptr = v;
-}
-
-static __always_inline void lz77_write16(void *ptr, u16 v)
-{
- lz77_write(16, ptr, v);
-}
-
-static __always_inline void lz77_write32(void *ptr, u32 v)
-{
- lz77_write(32, ptr, v);
-}
-
-static __always_inline u16 lz77_hash_bytes16(const u8 *ptr)
-{
- /* A "hash function" -- simple, fast, and enough for our use-case. */
- return ((lz77_read32(ptr) * 2654435769LL) >> 16) & LZ77_HASH_MASK;
-}
-
-static __always_inline u32 lz77_hash_bytes32(const u8 *ptr)
-{
- /* A "hash function" -- simple, fast, and enough for our use-case. */
- return ((lz77_read32(ptr) * 2654435769LL) >> (32 - LZ77_HASH_LOG)) & LZ77_HASH_MASK;
-}
-
-static __always_inline u64 lz77_hash_bytes64(const u8 *ptr)
-{
- /* A "hash function" -- simple, fast, and enough for our use-case. */
- return ((lz77_read64(ptr) << 24) * 889523592379ULL) >> (64 - LZ77_HASH_LOG);
-}
-
-#define lz77_hash_bytes lz77_hash_bytes64
-
-/*
- * Count common bytes in @diff, where @diff is:
- * (*(u64 *)bufA ^ *(u64)bufB)
- */
-static __always_inline u32 lz77_count_common_bytes(const u64 diff)
-{
- return count_trailing_zeros(diff) >> 3;
-}
-
-static __always_inline u32 lz77_match_len(const u8 *match, const u8 *cur, const u8 *end)
-{
- const u8 *start = cur;
- u32 step = sizeof(u32);
- u64 diff;
-
- if (cur > end - step)
- return 0;
-
- if (lz77_read32(cur) ^ lz77_read32(match))
- return 0;
-
- cur += step;
- match += step;
- step = sizeof(u64);
-
- while (likely(cur < end - (step - 1))) {
- diff = lz77_read64(cur) ^ lz77_read64(match);
- if (!diff) {
- cur += step;
- match += step;
-
- continue;
- }
-
- cur += lz77_count_common_bytes(diff);
-
- return (u32)(cur - start);
- }
-
- step = sizeof(u32);
-
- if (cur < end - (step - 1) && !(lz77_read32(cur) ^ lz77_read32(match))) {
- cur += step;
- match += step;
- }
-
- step = sizeof(u16);
-
- if (cur < end - (step - 1) && lz77_read16(cur) == lz77_read16(match)) {
- cur += step;
- match += step;
- }
-
- if (cur < end && *cur == *match)
- cur++;
-
- return (u32)(cur - start);
-}
-
-static __always_inline u8 *lz77_write_match(u8 *dst, u8 **nib, u32 dist, u32 len)
-{
- len -= 3;
- dist--;
- dist <<= 3;
-
- if (len < 7) {
- lz77_write16(dst, dist + len);
-
- return dst + 2;
- }
-
- dist |= 7;
- lz77_write16(dst, dist);
- dst += 2;
- len -= 7;
-
- if (!*nib) {
- lz77_write8(dst, umin(len, 15));
- *nib = dst;
- dst++;
- } else {
- **nib |= umin(len, 15) << 4;
- *nib = NULL;
- }
-
- if (len < 15)
- return dst;
-
- len -= 15;
- if (len < 255) {
- lz77_write8(dst, len);
-
- return dst + 1;
- }
-
- lz77_write8(dst, 0xff);
- dst++;
- len += 7 + 15;
-
- if (len <= 0xffff) {
- lz77_write16(dst, len);
-
- return dst + 2;
- }
-
- lz77_write16(dst, 0);
- dst += 2;
- lz77_write32(dst, len);
-
- return dst + 4;
-}
-
-static __always_inline void lz77_copy(u8 *dst, const u8 *src, size_t count)
-{
- while (count > 8) {
- *(u64 *)dst = *(u64 *)src;
- count -= 8;
- src += 8;
- dst += 8;
- }
-
- while (count > 4) {
- *(u32 *)dst = *(u32 *)src;
- count -= 4;
- src += 4;
- dst += 4;
- }
-
- if (count)
- memcpy(dst, src, count);
-}
-
-/*
- * Writing literals is the most expensive operation in the algorithm. Every and any perf
- * improvements here are welcome.
- *
- * Note: "O3" will actually slow things down for some kinds of input, whereas "O2" improves in all
- * (tested) scenarios.
- */
-#pragma GCC push_options
-#pragma GCC optimize ("O2")
-static __maybe_unused __always_inline u8 *lz77_write_literals(u8 *dst, const u8 *src, u32 count, struct lz77_flag *flags)
-{
- u32 c = umin(count, 32 - flags->count);
-
- /*
- * Writes split in 2 parts -- fast path, where we don't need to check the flag count on
- * every iteration, and the slow path, for fast path leftovers or when writing < 32
- * literals.
- */
- while (c == 32) {
- lz77_copy(dst, src, c);
-
- dst += c;
- src += c;
- count -= c;
-
- flags->val <<= c;
- lz77_write32(flags->pos, flags->val);
- flags->count = 0;
- flags->pos = dst;
- dst += 4;
-
- c = umin(count, 32);
- }
-
- while (count) {
- lz77_copy(dst, src, c);
-
- dst += c;
- src += c;
- count -= c;
-
- flags->val <<= c;
- flags->count += c;
- if (flags->count == 32) {
- lz77_write32(flags->pos, flags->val);
- flags->count = 0;
- flags->pos = dst;
- dst += 4;
- }
-
- c = umin(count, 32);
- }
-
- return dst;
-}
-#pragma GCC pop_options
-
-noinline int lz77_compress(const u8 *src, u32 slen, u8 *dst, u32 *dlen)
-{
- const u8 *srcp, *end, *anchor;
- //struct lz77_flag flags = { 0 };
- long f = 0;
- unsigned long _s, _e;
- u8 *dstp, *nib, *fp;
- u64 *htable;
- u32 fc = 0;
- int ret;
-
- if (!src || !dst)
- return -EINVAL;
-
- srcp = src;
- anchor = srcp;
- end = src + slen;
-
- dstp = dst;
- nib = NULL;
-
- /* Output buffer start with a flag. */
- //flags.pos = dstp;
- fp = dstp;
- dstp += 4;
-
- htable = kvcalloc(LZ77_HASH_SIZE, sizeof(*htable), GFP_KERNEL);
- if (!htable)
- return -ENOMEM;
-
- if (lz77_read32(srcp) ^ lz77_read32(srcp + LZ77_MATCH_MIN_LEN)) {
- u32 hash;
-
- lz77_copy(dstp, srcp, LZ77_MATCH_MIN_LEN);
- dstp += LZ77_MATCH_MIN_LEN;
- fc += LZ77_MATCH_MIN_LEN;
-
-#define htable_add(_p, _i) \
-({ \
- hash = lz77_hash_bytes(_p); \
- htable[hash] = (_i); \
-})
- htable_add(srcp++, 0);
- htable_add(srcp++, 1);
- htable_add(srcp++, 2);
- htable_add(srcp++, 3);
- }
-
- _s = jiffies;
-find_matches:
- while (likely(srcp + 32 + LZ77_MATCH_MIN_LEN < end)) {
- u32 offset, dist, len = 0;
- u64 hash;
- //while (likely(srcp + LZ77_MATCH_MIN_LEN < end)) {
- //while (likely(len == 0) || fc < 32) {
- for (; fc < 32 && len == 0; fc++, srcp++) {
- offset = srcp - src;
-
- hash = lz77_hash_bytes(srcp);
- dist = offset - htable[hash];
-
- if (dist >= LZ77_MATCH_MIN_DIST && dist < LZ77_MATCH_MAX_DIST)
- len = lz77_match_len(src + htable[hash], srcp, end);
-
- htable[hash] = offset;
-
- //if (len >= LZ77_MATCH_MIN_LEN)
- // break;
-
- //dist = 0;
- //len = 0;
- //srcp++;
- }
-
- if (len == 0) {
- //WARN(fc > 32, "%s: flag count (%u) > 32 (unexpected) (src pos %zu)\n", __func__, fc, srcp - src);
-
- lz77_copy(dstp, anchor, 32);
-
- dstp += 32;
- anchor = srcp;
-
- f <<= 32;
- *fp = f;
- fc -= 32;
- fp = dstp;
- dstp += 4;
-
- goto find_matches;
- }
-
- //if (unlikely(srcp + LZ77_MATCH_MIN_LEN >= end)) {
- // srcp = anchor;
- // break;
- //}
-
- //dstp = lz77_write_literals(dstp, anchor, srcp - anchor, &flags);
- dstp = lz77_write_match(dstp, &nib, dist, len);
- srcp += len;
- anchor = srcp;
-
- f = (f << 1) | 1;
- fc++;
- if (fc == 32) {
- *fp = f;
- fc = 0;
- fp = dstp;
- dstp += 4;
- }
-
-#if 0
- flags.val = (flags.val << 1) | 1;
- flags.count++;
- if (flags.count == 32) {
- lz77_write32(flags.pos, flags.val);
- flags.count = 0;
- flags.pos = dstp;
- dstp += 4;
- }
-#endif
-
- htable_add(srcp, srcp++ - src);
- htable_add(srcp, srcp++ - src);
- htable_add(srcp, srcp++ - src);
- htable_add(srcp, srcp++ - src);
- }
-
- //if (srcp < end) {
- //dstp = lz77_write_literals(dstp, srcp, end - srcp, &flags);
- while (srcp < end) {
- u32 c = umin(end - srcp, 32 - fc);
-
- lz77_copy(dstp, srcp, c);
-
- dstp += c;
- srcp += c;
-
- f <<= c;
- fc += c;
- if (fc == 32) {
- *fp = f;
- fc = 0;
- fp = dstp;
- dstp += 4;
- }
- }
-
- f <<= (32 - fc);
- f |= (1 << (32 - fc)) - 1;
- *fp = f;
-#if 0
- flags.val <<= (32 - flags.count);
- flags.val |= (1 << (32 - flags.count)) - 1;
- lz77_write32(flags.pos, flags.val);
-#endif
-
- _e = jiffies;
-
- *dlen = dstp - (u8 *)dst;
- pr_err("%s: took %ums to compress (slen=%u, dlen=%u)\n", __func__, jiffies_to_msecs(_e - _s), slen, *dlen);
-
- if (*dlen < slen)
- ret = 0;
- else
- ret = -EMSGSIZE;
-
- kvfree(htable);
-
- return ret;
-}
diff --git a/fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.h b/fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.h
deleted file mode 100644
index 904023f0fb4d..000000000000
--- a/fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2024, SUSE LLC
- *
- * Authors: Enzo Matsumiya <ematsumiya@suse.de>
- *
- * Implementation of the LZ77 "plain" compression algorithm, as per MS-XCA spec.
- */
-#ifndef _SMB_COMPRESS_LZ77_H
-#define _SMB_COMPRESS_LZ77_H
-
-#include <linux/kernel.h>
-
-static __always_inline size_t lz77_compress_mem_required(size_t len)
-{
- return len + (len >> 3);
-}
-
-int lz77_compress(const u8 *src, u32 slen, u8 *dst, u32 *dlen);
-#endif /* _SMB_COMPRESS_LZ77_H */