summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEnzo Matsumiya <ematsumiya@suse.de>2024-08-13 20:22:50 -0300
committerEnzo Matsumiya <ematsumiya@suse.de>2024-08-15 13:35:06 -0300
commit7167cd9466b4e8734c2f7510f37f8ea20c2dbd21 (patch)
tree9e842b283f2fa2264f1ce474cae8ce0bc2f90eb2
parent50f11f6ac98b5270a078bedd2685d3941dc6135b (diff)
downloadlinux-7167cd9466b4e8734c2f7510f37f8ea20c2dbd21.tar.gz
linux-7167cd9466b4e8734c2f7510f37f8ea20c2dbd21.tar.bz2
linux-7167cd9466b4e8734c2f7510f37f8ea20c2dbd21.zip
smb: client: compression WIP
works Signed-off-by: Enzo Matsumiya <ematsumiya@suse.de>
-rw-r--r--fs/smb/client/compress.c2
-rw-r--r--fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.c458
-rw-r--r--fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.h20
-rw-r--r--fs/smb/client/compress/lz77.c275
-rw-r--r--fs/smb/client/compress/lz77.h273
-rw-r--r--fs/smb/client/transport.c13
6 files changed, 688 insertions, 353 deletions
diff --git a/fs/smb/client/compress.c b/fs/smb/client/compress.c
index 645edde6e328..c313e9e6504c 100644
--- a/fs/smb/client/compress.c
+++ b/fs/smb/client/compress.c
@@ -322,7 +322,7 @@ bool should_compress(const struct cifs_tcon *tcon, const struct smb_rqst *rq)
int smb_compress(struct TCP_Server_Info *server, struct smb_rqst *rq, compress_send_fn send_fn)
{
struct iov_iter iter;
- size_t slen, dlen;
+ u32 slen, dlen;
void *src, *dst;
int ret;
diff --git a/fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.c b/fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.c
new file mode 100644
index 000000000000..1d949ee823dc
--- /dev/null
+++ b/fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.c
@@ -0,0 +1,458 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024, SUSE LLC
+ *
+ * Authors: Enzo Matsumiya <ematsumiya@suse.de>
+ *
+ * Implementation of the LZ77 "plain" compression algorithm, as per MS-XCA spec.
+ */
+#include <linux/prefetch.h>
+#include <linux/slab.h>
+#include <linux/sizes.h>
+#include <linux/count_zeros.h>
+
+#include "../compress.h"
+#include "lz77.h"
+
+#ifdef CONFIG_CIFS_DEBUG2
+# define LZ77_STATS_ENABLED 1
+#else
+# define LZ77_STATS_ENABLED 0
+#endif
+
+/*
+ * Compression parameters.
+ *
+ * - finding closer and smaller matches is faster, but also compresses worse, e.g.
+ * match min len = 4, max dist = 4K compared to match min len = 8, max dist = 8K:
+ * the former is ~10% faster, but yields a ~5% bigger output
+ * - we don't really care about a maximum match length -- it's simply much, much faster to compute
+ * a very long match length (e.g. >2M) and write only its encoded bytes than to write e.g. 500K
+ * of literals
+ */
+#define LZ77_MATCH_MIN_LEN 4
+#define LZ77_MATCH_MAX_LEN S32_MAX
+#define LZ77_MATCH_MIN_DIST 1
+#define LZ77_MATCH_MAX_DIST SZ_2K
+#define LZ77_HASH_LOG 15
+#define LZ77_HASH_SIZE (1 << LZ77_HASH_LOG)
+#define LZ77_HASH_MASK (LZ77_HASH_SIZE - 1)
+
+struct lz77_flag {
+ u8 *pos;
+ u32 count;
+ long val; /* This must be signed. */
+};
+
+#define lz77_read(nbits, _ptr) get_unaligned((const u ## nbits *)(_ptr))
+#define lz77_write(nbits, _ptr, _v) put_unaligned_le ## nbits((_v), (typeof((_v)) *)(_ptr))
+
+static __always_inline u16 lz77_read16(const void *ptr)
+{
+ return lz77_read(16, ptr);
+}
+
+static __always_inline u32 lz77_read32(const void *ptr)
+{
+ return lz77_read(32, ptr);
+}
+
+static __always_inline u64 lz77_read64(const void *ptr)
+{
+ return lz77_read(64, ptr);
+}
+
+static __always_inline void lz77_write8(void *ptr, u8 v)
+{
+ *(u8 *)ptr = v;
+}
+
+static __always_inline void lz77_write16(void *ptr, u16 v)
+{
+ lz77_write(16, ptr, v);
+}
+
+static __always_inline void lz77_write32(void *ptr, u32 v)
+{
+ lz77_write(32, ptr, v);
+}
+
+static __always_inline u16 lz77_hash_bytes16(const u8 *ptr)
+{
+ /* A "hash function" -- simple, fast, and enough for our use-case. */
+ return ((lz77_read32(ptr) * 2654435769LL) >> 16) & LZ77_HASH_MASK;
+}
+
+static __always_inline u32 lz77_hash_bytes32(const u8 *ptr)
+{
+ /* A "hash function" -- simple, fast, and enough for our use-case. */
+ return ((lz77_read32(ptr) * 2654435769LL) >> (32 - LZ77_HASH_LOG)) & LZ77_HASH_MASK;
+}
+
+static __always_inline u64 lz77_hash_bytes64(const u8 *ptr)
+{
+ /* A "hash function" -- simple, fast, and enough for our use-case. */
+ return ((lz77_read64(ptr) << 24) * 889523592379ULL) >> (64 - LZ77_HASH_LOG);
+}
+
+#define lz77_hash_bytes lz77_hash_bytes64
+
+/*
+ * Count common bytes in @diff, where @diff is:
+ * (*(u64 *)bufA ^ *(u64)bufB)
+ */
+static __always_inline u32 lz77_count_common_bytes(const u64 diff)
+{
+ return count_trailing_zeros(diff) >> 3;
+}
+
+static __always_inline u32 lz77_match_len(const u8 *match, const u8 *cur, const u8 *end)
+{
+ const u8 *start = cur;
+ u32 step = sizeof(u32);
+ u64 diff;
+
+ if (cur > end - step)
+ return 0;
+
+ if (lz77_read32(cur) ^ lz77_read32(match))
+ return 0;
+
+ cur += step;
+ match += step;
+ step = sizeof(u64);
+
+ while (likely(cur < end - (step - 1))) {
+ diff = lz77_read64(cur) ^ lz77_read64(match);
+ if (!diff) {
+ cur += step;
+ match += step;
+
+ continue;
+ }
+
+ cur += lz77_count_common_bytes(diff);
+
+ return (u32)(cur - start);
+ }
+
+ step = sizeof(u32);
+
+ if (cur < end - (step - 1) && !(lz77_read32(cur) ^ lz77_read32(match))) {
+ cur += step;
+ match += step;
+ }
+
+ step = sizeof(u16);
+
+ if (cur < end - (step - 1) && lz77_read16(cur) == lz77_read16(match)) {
+ cur += step;
+ match += step;
+ }
+
+ if (cur < end && *cur == *match)
+ cur++;
+
+ return (u32)(cur - start);
+}
+
+static __always_inline u8 *lz77_write_match(u8 *dst, u8 **nib, u32 dist, u32 len)
+{
+ len -= 3;
+ dist--;
+ dist <<= 3;
+
+ if (len < 7) {
+ lz77_write16(dst, dist + len);
+
+ return dst + 2;
+ }
+
+ dist |= 7;
+ lz77_write16(dst, dist);
+ dst += 2;
+ len -= 7;
+
+ if (!*nib) {
+ lz77_write8(dst, umin(len, 15));
+ *nib = dst;
+ dst++;
+ } else {
+ **nib |= umin(len, 15) << 4;
+ *nib = NULL;
+ }
+
+ if (len < 15)
+ return dst;
+
+ len -= 15;
+ if (len < 255) {
+ lz77_write8(dst, len);
+
+ return dst + 1;
+ }
+
+ lz77_write8(dst, 0xff);
+ dst++;
+ len += 7 + 15;
+
+ if (len <= 0xffff) {
+ lz77_write16(dst, len);
+
+ return dst + 2;
+ }
+
+ lz77_write16(dst, 0);
+ dst += 2;
+ lz77_write32(dst, len);
+
+ return dst + 4;
+}
+
+static __always_inline void lz77_copy(u8 *dst, const u8 *src, size_t count)
+{
+ while (count > 8) {
+ *(u64 *)dst = *(u64 *)src;
+ count -= 8;
+ src += 8;
+ dst += 8;
+ }
+
+ while (count > 4) {
+ *(u32 *)dst = *(u32 *)src;
+ count -= 4;
+ src += 4;
+ dst += 4;
+ }
+
+ if (count)
+ memcpy(dst, src, count);
+}
+
+/*
+ * Writing literals is the most expensive operation in the algorithm. Every and any perf
+ * improvements here are welcome.
+ *
+ * Note: "O3" will actually slow things down for some kinds of input, whereas "O2" improves in all
+ * (tested) scenarios.
+ */
+#pragma GCC push_options
+#pragma GCC optimize ("O2")
+static __maybe_unused __always_inline u8 *lz77_write_literals(u8 *dst, const u8 *src, u32 count, struct lz77_flag *flags)
+{
+ u32 c = umin(count, 32 - flags->count);
+
+ /*
+ * Writes split in 2 parts -- fast path, where we don't need to check the flag count on
+ * every iteration, and the slow path, for fast path leftovers or when writing < 32
+ * literals.
+ */
+ while (c == 32) {
+ lz77_copy(dst, src, c);
+
+ dst += c;
+ src += c;
+ count -= c;
+
+ flags->val <<= c;
+ lz77_write32(flags->pos, flags->val);
+ flags->count = 0;
+ flags->pos = dst;
+ dst += 4;
+
+ c = umin(count, 32);
+ }
+
+ while (count) {
+ lz77_copy(dst, src, c);
+
+ dst += c;
+ src += c;
+ count -= c;
+
+ flags->val <<= c;
+ flags->count += c;
+ if (flags->count == 32) {
+ lz77_write32(flags->pos, flags->val);
+ flags->count = 0;
+ flags->pos = dst;
+ dst += 4;
+ }
+
+ c = umin(count, 32);
+ }
+
+ return dst;
+}
+#pragma GCC pop_options
+
+noinline int lz77_compress(const u8 *src, u32 slen, u8 *dst, u32 *dlen)
+{
+ const u8 *srcp, *end, *anchor;
+ //struct lz77_flag flags = { 0 };
+ long f = 0;
+ unsigned long _s, _e;
+ u8 *dstp, *nib, *fp;
+ u64 *htable;
+ u32 fc = 0;
+ int ret;
+
+ if (!src || !dst)
+ return -EINVAL;
+
+ srcp = src;
+ anchor = srcp;
+ end = src + slen;
+
+ dstp = dst;
+ nib = NULL;
+
+ /* Output buffer start with a flag. */
+ //flags.pos = dstp;
+ fp = dstp;
+ dstp += 4;
+
+ htable = kvcalloc(LZ77_HASH_SIZE, sizeof(*htable), GFP_KERNEL);
+ if (!htable)
+ return -ENOMEM;
+
+ if (lz77_read32(srcp) ^ lz77_read32(srcp + LZ77_MATCH_MIN_LEN)) {
+ u32 hash;
+
+ lz77_copy(dstp, srcp, LZ77_MATCH_MIN_LEN);
+ dstp += LZ77_MATCH_MIN_LEN;
+ fc += LZ77_MATCH_MIN_LEN;
+
+#define htable_add(_p, _i) \
+({ \
+ hash = lz77_hash_bytes(_p); \
+ htable[hash] = (_i); \
+})
+ htable_add(srcp++, 0);
+ htable_add(srcp++, 1);
+ htable_add(srcp++, 2);
+ htable_add(srcp++, 3);
+ }
+
+ _s = jiffies;
+find_matches:
+ while (likely(srcp + 32 + LZ77_MATCH_MIN_LEN < end)) {
+ u32 offset, dist, len = 0;
+ u64 hash;
+ //while (likely(srcp + LZ77_MATCH_MIN_LEN < end)) {
+ //while (likely(len == 0) || fc < 32) {
+ for (; fc < 32 && len == 0; fc++, srcp++) {
+ offset = srcp - src;
+
+ hash = lz77_hash_bytes(srcp);
+ dist = offset - htable[hash];
+
+ if (dist >= LZ77_MATCH_MIN_DIST && dist < LZ77_MATCH_MAX_DIST)
+ len = lz77_match_len(src + htable[hash], srcp, end);
+
+ htable[hash] = offset;
+
+ //if (len >= LZ77_MATCH_MIN_LEN)
+ // break;
+
+ //dist = 0;
+ //len = 0;
+ //srcp++;
+ }
+
+ if (len == 0) {
+ //WARN(fc > 32, "%s: flag count (%u) > 32 (unexpected) (src pos %zu)\n", __func__, fc, srcp - src);
+
+ lz77_copy(dstp, anchor, 32);
+
+ dstp += 32;
+ anchor = srcp;
+
+ f <<= 32;
+ *fp = f;
+ fc -= 32;
+ fp = dstp;
+ dstp += 4;
+
+ goto find_matches;
+ }
+
+ //if (unlikely(srcp + LZ77_MATCH_MIN_LEN >= end)) {
+ // srcp = anchor;
+ // break;
+ //}
+
+ //dstp = lz77_write_literals(dstp, anchor, srcp - anchor, &flags);
+ dstp = lz77_write_match(dstp, &nib, dist, len);
+ srcp += len;
+ anchor = srcp;
+
+ f = (f << 1) | 1;
+ fc++;
+ if (fc == 32) {
+ *fp = f;
+ fc = 0;
+ fp = dstp;
+ dstp += 4;
+ }
+
+#if 0
+ flags.val = (flags.val << 1) | 1;
+ flags.count++;
+ if (flags.count == 32) {
+ lz77_write32(flags.pos, flags.val);
+ flags.count = 0;
+ flags.pos = dstp;
+ dstp += 4;
+ }
+#endif
+
+ htable_add(srcp, srcp++ - src);
+ htable_add(srcp, srcp++ - src);
+ htable_add(srcp, srcp++ - src);
+ htable_add(srcp, srcp++ - src);
+ }
+
+ //if (srcp < end) {
+ //dstp = lz77_write_literals(dstp, srcp, end - srcp, &flags);
+ while (srcp < end) {
+ u32 c = umin(end - srcp, 32 - fc);
+
+ lz77_copy(dstp, srcp, c);
+
+ dstp += c;
+ srcp += c;
+
+ f <<= c;
+ fc += c;
+ if (fc == 32) {
+ *fp = f;
+ fc = 0;
+ fp = dstp;
+ dstp += 4;
+ }
+ }
+
+ f <<= (32 - fc);
+ f |= (1 << (32 - fc)) - 1;
+ *fp = f;
+#if 0
+ flags.val <<= (32 - flags.count);
+ flags.val |= (1 << (32 - flags.count)) - 1;
+ lz77_write32(flags.pos, flags.val);
+#endif
+
+ _e = jiffies;
+
+ *dlen = dstp - (u8 *)dst;
+ pr_err("%s: took %ums to compress (slen=%u, dlen=%u)\n", __func__, jiffies_to_msecs(_e - _s), slen, *dlen);
+
+ if (*dlen < slen)
+ ret = 0;
+ else
+ ret = -EMSGSIZE;
+
+ kvfree(htable);
+
+ return ret;
+}
diff --git a/fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.h b/fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.h
new file mode 100644
index 000000000000..904023f0fb4d
--- /dev/null
+++ b/fs/smb/client/compress/lz77-fastest-2024-08-13/lz77.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2024, SUSE LLC
+ *
+ * Authors: Enzo Matsumiya <ematsumiya@suse.de>
+ *
+ * Implementation of the LZ77 "plain" compression algorithm, as per MS-XCA spec.
+ */
+#ifndef _SMB_COMPRESS_LZ77_H
+#define _SMB_COMPRESS_LZ77_H
+
+#include <linux/kernel.h>
+
+static __always_inline size_t lz77_compress_mem_required(size_t len)
+{
+ return len + (len >> 3);
+}
+
+int lz77_compress(const u8 *src, u32 slen, u8 *dst, u32 *dlen);
+#endif /* _SMB_COMPRESS_LZ77_H */
diff --git a/fs/smb/client/compress/lz77.c b/fs/smb/client/compress/lz77.c
index 2b8d548f9492..2cbfcf3ed9bf 100644
--- a/fs/smb/client/compress/lz77.c
+++ b/fs/smb/client/compress/lz77.c
@@ -6,15 +6,142 @@
*
* Implementation of the LZ77 "plain" compression algorithm, as per MS-XCA spec.
*/
+#include <linux/prefetch.h>
#include <linux/slab.h>
+#include <linux/sizes.h>
+#include <linux/count_zeros.h>
+
+#include "../compress.h"
#include "lz77.h"
-static __always_inline u32 hash3(const u8 *ptr)
+#ifdef CONFIG_CIFS_DEBUG2
+# define LZ77_STATS_ENABLED 1
+#else
+# define LZ77_STATS_ENABLED 0
+#endif
+
+/*
+ * Compression parameters.
+ *
+ * - finding closer and smaller matches is faster, but also compresses worse, e.g.
+ * match min len = 4, max dist = 4K compared to match min len = 8, max dist = 8K:
+ * the former is ~10% faster, but yields a ~5% bigger output
+ * - we don't really care about a maximum match length -- it's simply much, much faster to compute
+ * a very long match length (e.g. >2M) and write only its encoded bytes than to write e.g. 500K
+ * of literals
+ */
+#define LZ77_MATCH_MIN_LEN 4
+#define LZ77_MATCH_MAX_LEN S32_MAX
+#define LZ77_MATCH_MIN_DIST 1
+#define LZ77_MATCH_MAX_DIST SZ_2K
+#define LZ77_HASH_LOG 15
+#define LZ77_HASH_SIZE (1 << LZ77_HASH_LOG)
+
+#define lz77_read(nbits, _ptr) get_unaligned((const u ## nbits *)(_ptr))
+#define lz77_write(nbits, _ptr, _v) put_unaligned_le ## nbits((_v), (typeof((_v)) *)(_ptr))
+
+struct lz77_flag {
+ u8 *pos;
+ u32 count;
+ long val;
+};
+
+static __always_inline u16 lz77_read16(const void *ptr)
+{
+ return lz77_read(16, ptr);
+}
+
+static __always_inline u32 lz77_read32(const void *ptr)
{
- return lz77_hash32(lz77_read32(ptr) & 0xffffff, LZ77_HASH_LOG);
+ return lz77_read(32, ptr);
+}
+
+static __always_inline u64 lz77_read64(const void *ptr)
+{
+ return lz77_read(64, ptr);
+}
+
+static __always_inline void lz77_write8(void *ptr, u8 v)
+{
+ *(u8 *)ptr = v;
+}
+
+static __always_inline void lz77_write16(void *ptr, u16 v)
+{
+ lz77_write(16, ptr, v);
+}
+
+static __always_inline void lz77_write32(void *ptr, u32 v)
+{
+ lz77_write(32, ptr, v);
+}
+
+static __always_inline u64 lz77_hash_bytes(const u8 *ptr)
+{
+ /* A "hash function" -- simple, fast, and enough for our use-case. */
+ return ((lz77_read64(ptr) << 24) * 889523592379ULL) >> (64 - LZ77_HASH_LOG);
+}
+
+/*
+ * Count common bytes in @diff, where @diff is:
+ * (*(u64 *)bufA ^ *(u64)bufB)
+ */
+static __always_inline u32 lz77_count_common_bytes(const u64 diff)
+{
+ return count_trailing_zeros(diff) >> 3;
+}
+
+static __always_inline u32 lz77_match_len(const u8 *match, const u8 *cur, const u8 *end)
+{
+ const u8 *start = cur;
+ u32 step = sizeof(u32);
+ u64 diff;
+
+ if (cur > end - step)
+ return 0;
+
+ if (lz77_read32(cur) ^ lz77_read32(match))
+ return 0;
+
+ cur += step;
+ match += step;
+ step = sizeof(u64);
+
+ while (likely(cur < end - (step - 1))) {
+ diff = lz77_read64(cur) ^ lz77_read64(match);
+ if (!diff) {
+ cur += step;
+ match += step;
+
+ continue;
+ }
+
+ cur += lz77_count_common_bytes(diff);
+
+ return (u32)(cur - start);
+ }
+
+ step = sizeof(u32);
+
+ if (cur < end - (step - 1) && !(lz77_read32(cur) ^ lz77_read32(match))) {
+ cur += step;
+ match += step;
+ }
+
+ step = sizeof(u16);
+
+ if (cur < end - (step - 1) && lz77_read16(cur) == lz77_read16(match)) {
+ cur += step;
+ match += step;
+ }
+
+ if (cur < end && *cur == *match)
+ cur++;
+
+ return (u32)(cur - start);
}
-static u8 *write_match(u8 *dst, u8 **nib, u32 dist, u32 len)
+static __always_inline u8 *lz77_write_match(u8 *dst, u8 **nib, u32 dist, u32 len)
{
len -= 3;
dist--;
@@ -22,6 +149,7 @@ static u8 *write_match(u8 *dst, u8 **nib, u32 dist, u32 len)
if (len < 7) {
lz77_write16(dst, dist + len);
+
return dst + 2;
}
@@ -31,11 +159,11 @@ static u8 *write_match(u8 *dst, u8 **nib, u32 dist, u32 len)
len -= 7;
if (!*nib) {
+ lz77_write8(dst, umin(len, 15));
*nib = dst;
- lz77_write8(dst, min_t(unsigned int, len, 15));
dst++;
} else {
- **nib |= min_t(unsigned int, len, 15) << 4;
+ **nib |= umin(len, 15) << 4;
*nib = NULL;
}
@@ -45,15 +173,17 @@ static u8 *write_match(u8 *dst, u8 **nib, u32 dist, u32 len)
len -= 15;
if (len < 255) {
lz77_write8(dst, len);
+
return dst + 1;
}
lz77_write8(dst, 0xff);
dst++;
-
len += 7 + 15;
+
if (len <= 0xffff) {
lz77_write16(dst, len);
+
return dst + 2;
}
@@ -64,19 +194,34 @@ static u8 *write_match(u8 *dst, u8 **nib, u32 dist, u32 len)
return dst + 4;
}
-static u8 *write_literals(u8 *dst, const u8 *dst_end, const u8 *src, size_t count,
- struct lz77_flags *flags)
+static __always_inline void lz77_copy(u8 *dst, const u8 *src, size_t count)
{
- const u8 *end = src + count;
+ while (count > 8) {
+ *(u64 *)dst = *(u64 *)src;
+ count -= 8;
+ src += 8;
+ dst += 8;
+ }
- while (src < end) {
- size_t c = lz77_min(count, 32 - flags->count);
+ while (count > 4) {
+ *(u32 *)dst = *(u32 *)src;
+ count -= 4;
+ src += 4;
+ dst += 4;
+ }
- if (dst + c >= dst_end)
- return ERR_PTR(-EFAULT);
+ if (count)
+ memcpy(dst, src, count);
+}
+
+static u8 *lz77_write_literals(u8 *dst, const u8 *src, u32 count, struct lz77_flag *flags)
+{
+ const u8 *end = src + count;
+
+ while (likely(src < end)) {
+ u32 c = umin(count, 32 - flags->count);
- if (lz77_copy(dst, src, c))
- return ERR_PTR(-EFAULT);
+ lz77_copy(dst, src, c);
dst += c;
src += c;
@@ -95,90 +240,58 @@ static u8 *write_literals(u8 *dst, const u8 *dst_end, const u8 *src, size_t coun
return dst;
}
-static __always_inline bool is_valid_match(const u32 dist, const u32 len)
-{
- return (dist >= LZ77_MATCH_MIN_DIST && dist < LZ77_MATCH_MAX_DIST) &&
- (len >= LZ77_MATCH_MIN_LEN && len < LZ77_MATCH_MAX_LEN);
-}
-
-static __always_inline const u8 *find_match(u32 *htable, const u8 *base, const u8 *cur,
- const u8 *end, u32 *best_len)
-{
- const u8 *match;
- u32 hash;
- size_t offset;
-
- hash = hash3(cur);
- offset = cur - base;
-
- if (htable[hash] >= offset)
- return cur;
-
- match = base + htable[hash];
- *best_len = lz77_match(match, cur, end);
- if (is_valid_match(cur - match, *best_len))
- return match;
-
- return cur;
-}
-
-int lz77_compress(const u8 *src, size_t src_len, u8 *dst, size_t *dst_len)
+noinline int lz77_compress(const u8 *src, u32 slen, u8 *dst, u32 *dlen)
{
- const u8 *srcp, *src_end, *anchor;
- struct lz77_flags flags = { 0 };
- u8 *dstp, *dst_end, *nib;
- u32 *htable;
+ const u8 *srcp, *end, *anchor;
+ struct lz77_flag flags = { 0 };
+ u8 *dstp, *nib;
+ u64 *htable;
int ret;
srcp = src;
anchor = srcp;
- src_end = src + src_len;
+ end = src + slen;
dstp = dst;
- dst_end = dst + *dst_len;
- flags.pos = dstp;
nib = NULL;
- memset(dstp, 0, *dst_len);
+ /* Output buffer start with a 4 byte flags. */
+ flags.pos = dstp;
dstp += 4;
- htable = kvcalloc(LZ77_HASH_SIZE, sizeof(u32), GFP_KERNEL);
+ htable = kvcalloc(LZ77_HASH_SIZE, sizeof(*htable), GFP_KERNEL);
if (!htable)
return -ENOMEM;
- /* fill hashtable with invalid offsets */
- memset(htable, 0xff, LZ77_HASH_SIZE * sizeof(u32));
+ /* Main loop. */
+ while (likely(srcp < end)) {
+ u32 offset, dist, len;
+ u64 hash;
- /* from here on, any error is because @dst_len reached >= @src_len */
- ret = -EMSGSIZE;
+ while (likely(srcp + LZ77_MATCH_MIN_LEN < end)) {
+ offset = srcp - src;
+ hash = lz77_hash_bytes(srcp);
- /* main loop */
- while (srcp < src_end) {
- u32 hash, dist, len;
- const u8 *match;
+ dist = offset - htable[hash];
+ if (dist >= LZ77_MATCH_MIN_DIST && dist < LZ77_MATCH_MAX_DIST)
+ len = lz77_match_len(src + htable[hash], srcp, end);
- while (srcp + 3 < src_end) {
- len = LZ77_MATCH_MIN_LEN - 1;
- match = find_match(htable, src, srcp, src_end, &len);
- hash = hash3(srcp);
- htable[hash] = srcp - src;
+ htable[hash] = offset;
- if (likely(match < srcp)) {
- dist = srcp - match;
+ if (len >= LZ77_MATCH_MIN_LEN)
break;
- }
+ dist = 0;
+ len = 0;
srcp++;
}
- dstp = write_literals(dstp, dst_end, anchor, srcp - anchor, &flags);
- if (IS_ERR(dstp))
- goto err_free;
+ dstp = lz77_write_literals(dstp, anchor, srcp - anchor, &flags);
- if (srcp + 3 >= src_end)
+ if (unlikely(srcp + LZ77_MATCH_MIN_LEN >= end))
goto leftovers;
- dstp = write_match(dstp, &nib, dist, len);
+ dstp = lz77_write_match(dstp, &nib, dist, len);
srcp += len;
anchor = srcp;
@@ -192,19 +305,19 @@ int lz77_compress(const u8 *src, size_t src_len, u8 *dst, size_t *dst_len)
}
}
leftovers:
- if (srcp < src_end) {
- dstp = write_literals(dstp, dst_end, srcp, src_end - srcp, &flags);
- if (IS_ERR(dstp))
- goto err_free;
- }
+ if (srcp < end)
+ dstp = lz77_write_literals(dstp, srcp, end - srcp, &flags);
flags.val <<= (32 - flags.count);
flags.val |= (1 << (32 - flags.count)) - 1;
lz77_write32(flags.pos, flags.val);
- *dst_len = dstp - dst;
- ret = 0;
-err_free:
+ *dlen = dstp - (u8 *)dst;
+ if (*dlen < slen)
+ ret = 0;
+ else
+ ret = -EMSGSIZE;
+//out:
kvfree(htable);
return ret;
diff --git a/fs/smb/client/compress/lz77.h b/fs/smb/client/compress/lz77.h
index 0cc9c6abf840..904023f0fb4d 100644
--- a/fs/smb/client/compress/lz77.h
+++ b/fs/smb/client/compress/lz77.h
@@ -4,282 +4,17 @@
*
* Authors: Enzo Matsumiya <ematsumiya@suse.de>
*
- * Definitions and optmized helpers for LZ77 compression.
+ * Implementation of the LZ77 "plain" compression algorithm, as per MS-XCA spec.
*/
#ifndef _SMB_COMPRESS_LZ77_H
#define _SMB_COMPRESS_LZ77_H
-#ifdef CONFIG_CIFS_COMPRESSION
-#include <asm/ptrace.h>
#include <linux/kernel.h>
-#include <linux/string.h>
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-#include <asm-generic/unaligned.h>
-#endif
-#define LZ77_HASH_LOG 13
-#define LZ77_HASH_SIZE (1 << LZ77_HASH_LOG)
-#define LZ77_HASH_MASK lz77_hash_mask(LZ77_HASH_LOG)
-
-/* We can increase this for better compression (but worse performance). */
-#define LZ77_MATCH_MIN_LEN 3
-/* From MS-XCA, but it's arbitrarily chosen. */
-#define LZ77_MATCH_MAX_LEN S32_MAX
-/*
- * Check this to ensure we don't match the current position, which would
- * end up doing a verbatim copy of the input, and actually overflowing
- * the output buffer because of the encoded metadata.
- */
-#define LZ77_MATCH_MIN_DIST 1
-/* How far back in the buffer can we try to find a match (i.e. window size) */
-#define LZ77_MATCH_MAX_DIST 8192
-
-#define LZ77_STEPSIZE_16 sizeof(u16)
-#define LZ77_STEPSIZE_32 sizeof(u32)
-#define LZ77_STEPSIZE_64 sizeof(u64)
-
-struct lz77_flags {
- u8 *pos;
- size_t count;
- long val;
-};
-
-static __always_inline u32 lz77_hash_mask(const unsigned int log2)
-{
- return ((1 << log2) - 1);
-}
-
-static __always_inline u32 lz77_hash64(const u64 v, const unsigned int log2)
-{
- const u64 prime5bytes = 889523592379ULL;
-
- return (u32)(((v << 24) * prime5bytes) >> (64 - log2));
-}
-
-static __always_inline u32 lz77_hash32(const u32 v, const unsigned int log2)
-{
- return ((v * 2654435769LL) >> (32 - log2)) & lz77_hash_mask(log2);
-}
-
-static __always_inline u32 lz77_log2(unsigned int x)
-{
- return x ? ((u32)(31 - __builtin_clz(x))) : 0;
-}
-
-#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-static __always_inline u8 lz77_read8(const void *ptr)
-{
- return *(u8 *)ptr;
-}
-
-static __always_inline u16 lz77_read16(const void *ptr)
-{
- return *(u16 *)ptr;
-}
-
-static __always_inline u32 lz77_read32(const void *ptr)
-{
- return *(u32 *)ptr;
-}
-
-static __always_inline u64 lz77_read64(const void *ptr)
-{
- return *(u64 *)ptr;
-}
-
-static __always_inline void lz77_write8(void *ptr, const u8 v)
-{
- *(u8 *)ptr = v;
-}
-
-static __always_inline void lz77_write16(void *ptr, const u16 v)
-{
- *(u16 *)ptr = v;
-}
-
-static __always_inline void lz77_write32(void *ptr, const u32 v)
+static __always_inline size_t lz77_compress_mem_required(size_t len)
{
- *(u32 *)ptr = v;
-}
-
-static __always_inline void lz77_write64(void *ptr, const u64 v)
-{
- *(u64 *)ptr = v;
-}
-
-static __always_inline void lz77_write_ptr16(void *ptr, const void *vp)
-{
- *(u16 *)ptr = *(const u16 *)vp;
-}
-
-static __always_inline void lz77_write_ptr32(void *ptr, const void *vp)
-{
- *(u32 *)ptr = *(const u32 *)vp;
-}
-
-static __always_inline void lz77_write_ptr64(void *ptr, const void *vp)
-{
- *(u64 *)ptr = *(const u64 *)vp;
-}
-
-static __always_inline long lz77_copy(u8 *dst, const u8 *src, size_t count)
-{
- return copy_from_kernel_nofault(dst, src, count);
-}
-#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
-static __always_inline u8 lz77_read8(const void *ptr)
-{
- return get_unaligned((u8 *)ptr);
-}
-
-static __always_inline u16 lz77_read16(const void *ptr)
-{
- return lz77_read8(ptr) | (lz77_read8(ptr + 1) << 8);
-}
-
-static __always_inline u32 lz77_read32(const void *ptr)
-{
- return lz77_read16(ptr) | (lz77_read16(ptr + 2) << 16);
-}
-
-static __always_inline u64 lz77_read64(const void *ptr)
-{
- return lz77_read32(ptr) | ((u64)lz77_read32(ptr + 4) << 32);
-}
-
-static __always_inline void lz77_write8(void *ptr, const u8 v)
-{
- put_unaligned(v, (u8 *)ptr);
-}
-
-static __always_inline void lz77_write16(void *ptr, const u16 v)
-{
- lz77_write8(ptr, v & 0xff);
- lz77_write8(ptr + 1, (v >> 8) & 0xff);
-}
-
-static __always_inline void lz77_write32(void *ptr, const u32 v)
-{
- lz77_write16(ptr, v & 0xffff);
- lz77_write16(ptr + 2, (v >> 16) & 0xffff);
-}
-
-static __always_inline void lz77_write64(void *ptr, const u64 v)
-{
- lz77_write32(ptr, v & 0xffffffff);
- lz77_write32(ptr + 4, (v >> 32) & 0xffffffff);
-}
-
-static __always_inline void lz77_write_ptr16(void *ptr, const void *vp)
-{
- const u16 v = lz77_read16(vp);
-
- lz77_write16(ptr, v);
-}
-
-static __always_inline void lz77_write_ptr32(void *ptr, const void *vp)
-{
- const u32 v = lz77_read32(vp);
-
- lz77_write32(ptr, v);
-}
-
-static __always_inline void lz77_write_ptr64(void *ptr, const void *vp)
-{
- const u64 v = lz77_read64(vp);
-
- lz77_write64(ptr, v);
-}
-static __always_inline long lz77_copy(u8 *dst, const u8 *src, size_t count)
-{
- memcpy(dst, src, count);
- return 0;
-}
-#endif /* !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
-
-static __always_inline unsigned int __count_common_bytes(const unsigned long diff)
-{
-#ifdef __has_builtin
-# if __has_builtin(__builtin_ctzll)
- return (unsigned int)__builtin_ctzll(diff) >> 3;
-# endif
-#else
- /* count trailing zeroes */
- unsigned long bits = 0, i, z = 0;
-
- bits |= diff;
- for (i = 0; i < 64; i++) {
- if (bits[i])
- break;
- z++;
- }
-
- return (unsigned int)z >> 3;
-#endif
-}
-
-static __always_inline size_t lz77_match(const u8 *match, const u8 *cur, const u8 *end)
-{
- const u8 *start = cur;
-
- if (cur == match)
- return 0;
-
- if (likely(cur < end - (LZ77_STEPSIZE_64 - 1))) {
- u64 const diff = lz77_read64(cur) ^ lz77_read64(match);
-
- if (!diff) {
- cur += LZ77_STEPSIZE_64;
- match += LZ77_STEPSIZE_64;
- } else {
- return __count_common_bytes(diff);
- }
- }
-
- while (likely(cur < end - (LZ77_STEPSIZE_64 - 1))) {
- u64 const diff = lz77_read64(cur) ^ lz77_read64(match);
-
- if (!diff) {
- cur += LZ77_STEPSIZE_64;
- match += LZ77_STEPSIZE_64;
- continue;
- }
-
- cur += __count_common_bytes(diff);
- return (size_t)(cur - start);
- }
-
- if (cur < end - 3 && !(lz77_read32(cur) ^ lz77_read32(match))) {
- cur += LZ77_STEPSIZE_32;
- match += LZ77_STEPSIZE_32;
- }
-
- if (cur < end - 1 && lz77_read16(cur) == lz77_read16(match)) {
- cur += LZ77_STEPSIZE_16;
- match += LZ77_STEPSIZE_16;
- }
-
- if (cur < end && *cur == *match)
- cur++;
-
- return (size_t)(cur - start);
-}
-
-static __always_inline unsigned long lz77_max(unsigned long a, unsigned long b)
-{
- int m = (a < b) - 1;
-
- return (a & m) | (b & ~m);
-}
-
-static __always_inline unsigned long lz77_min(unsigned long a, unsigned long b)
-{
- int m = (a > b) - 1;
-
- return (a & m) | (b & ~m);
+ return len + (len >> 3);
}
-int lz77_compress(const u8 *src, size_t src_len, u8 *dst, size_t *dst_len);
-/* when CONFIG_CIFS_COMPRESSION not set lz77_compress() is not called */
-#endif /* !CONFIG_CIFS_COMPRESSION */
+int lz77_compress(const u8 *src, u32 slen, u8 *dst, u32 *dlen);
#endif /* _SMB_COMPRESS_LZ77_H */
diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c
index fd5a85d43759..3d5416e7c07d 100644
--- a/fs/smb/client/transport.c
+++ b/fs/smb/client/transport.c
@@ -433,8 +433,17 @@ smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
struct kvec *iov;
int rc;
- if (flags & CIFS_COMPRESS_REQ)