summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEnzo Matsumiya <ematsumiya@suse.de>2024-05-21 16:07:50 -0300
committerEnzo Matsumiya <ematsumiya@suse.de>2024-05-21 16:07:50 -0300
commit813f0f07cc264107457d9ec8b0b74243a6ccb08e (patch)
treee7c72896829020bda20eda35588a3bb562789c48
parent04597d141a44415c1e2220b5f821c23f02efa00e (diff)
downloadlinux-smb-compression-splice.tar.gz
linux-smb-compression-splice.tar.bz2
linux-smb-compression-splice.zip
smb: client: add splice supportsmb-compression-splice
Signed-off-by: Enzo Matsumiya <ematsumiya@suse.de>
-rw-r--r--fs/smb/client/Kconfig12
-rw-r--r--fs/smb/client/cifsfs.c34
-rw-r--r--fs/smb/client/compress.c371
-rw-r--r--fs/smb/client/splice.c225
-rw-r--r--fs/smb/client/splice.h53
5 files changed, 693 insertions, 2 deletions
diff --git a/fs/smb/client/Kconfig b/fs/smb/client/Kconfig
index 9f05f94e265a..7676402ce805 100644
--- a/fs/smb/client/Kconfig
+++ b/fs/smb/client/Kconfig
@@ -217,4 +217,16 @@ config CIFS_COMPRESSION
Say Y here if you want SMB traffic to be compressed.
If unsure, say N.
+config CIFS_SPLICE
+ bool "Enable splice(2) support (EXPERIMENTAL)"
+ depends on CIFS
+ default n
+ help
+ This enables support for splice(2) operations on SMB mounts.
+
+ Requires an up-to-date cifs-utils install.
+
+ Say Y here if you want SMB mounts to support splice.
+ If unsure, say N.
+
endif
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index 6de2ea59e644..939fa5b55e45 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -1541,8 +1541,13 @@ const struct file_operations cifs_file_ops = {
.fsync = cifs_fsync,
.flush = cifs_flush,
.mmap = cifs_file_mmap,
+#ifndef CONFIG_SPLICE
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
+#else /* !CONFIG_SPLICE */
+ .splice_read = cifs_splice_read,
+ .splice_write = cifs_splice_write,
+#endif /* !CONFIG_SPLICE */
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
@@ -1561,8 +1566,13 @@ const struct file_operations cifs_file_strict_ops = {
.fsync = cifs_strict_fsync,
.flush = cifs_flush,
.mmap = cifs_file_strict_mmap,
+#ifndef CONFIG_SPLICE
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
+#else /* !CONFIG_SPLICE */
+ .splice_read = cifs_splice_read,
+ .splice_write = cifs_splice_write,
+#endif /* !CONFIG_SPLICE */
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
@@ -1581,8 +1591,13 @@ const struct file_operations cifs_file_direct_ops = {
.fsync = cifs_fsync,
.flush = cifs_flush,
.mmap = cifs_file_mmap,
- .splice_read = copy_splice_read,
+#ifndef CONFIG_SPLICE
+ .splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
+#else /* !CONFIG_SPLICE */
+ .splice_read = cifs_splice_read,
+ .splice_write = cifs_splice_write,
+#endif /* !CONFIG_SPLICE */
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
.remap_file_range = cifs_remap_file_range,
@@ -1599,8 +1614,13 @@ const struct file_operations cifs_file_nobrl_ops = {
.fsync = cifs_fsync,
.flush = cifs_flush,
.mmap = cifs_file_mmap,
+#ifndef CONFIG_SPLICE
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
+#else /* !CONFIG_SPLICE */
+ .splice_read = cifs_splice_read,
+ .splice_write = cifs_splice_write,
+#endif /* !CONFIG_SPLICE */
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
@@ -1617,8 +1637,13 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
.fsync = cifs_strict_fsync,
.flush = cifs_flush,
.mmap = cifs_file_strict_mmap,
+#ifndef CONFIG_SPLICE
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
+#else /* !CONFIG_SPLICE */
+ .splice_read = cifs_splice_read,
+ .splice_write = cifs_splice_write,
+#endif /* !CONFIG_SPLICE */
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
@@ -1635,8 +1660,13 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
.fsync = cifs_fsync,
.flush = cifs_flush,
.mmap = cifs_file_mmap,
- .splice_read = copy_splice_read,
+#ifndef CONFIG_SPLICE
+ .splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
+#else /* !CONFIG_SPLICE */
+ .splice_read = cifs_splice_read,
+ .splice_write = cifs_splice_write,
+#endif /* !CONFIG_SPLICE */
.unlocked_ioctl = cifs_ioctl,
.copy_file_range = cifs_copy_file_range,
.remap_file_range = cifs_remap_file_range,
diff --git a/fs/smb/client/compress.c b/fs/smb/client/compress.c
index af14508c2b3b..17baa9ebc639 100644
--- a/fs/smb/client/compress.c
+++ b/fs/smb/client/compress.c
@@ -23,6 +23,374 @@
#include "compress/lz77.h"
#include "compress.h"
+#define SAMPLING_READ_SIZE (16)
+#define SAMPLING_INTERVAL (256)
+#define BUCKET_SIZE (256)
+/*
+ * The size of the sample is based on a statistical sampling rule of thumb.
+ * The common way is to perform sampling tests as long as the number of
+ * elements in each cell is at least 5.
+ *
+ * Instead of 5, we choose 32 to obtain more accurate results.
+ * If the data contain the maximum number of symbols, which is 256, we obtain a
+ * sample size bound by 8192.
+ *
+ * For a sample of at most 8KB of data per data range: 16 consecutive bytes
+ * from up to 512 locations.
+ */
+#define MAX_SAMPLE_SIZE (8192 * SAMPLING_READ_SIZE / SAMPLING_INTERVAL)
+// ^ == LZ77 window size
+
+struct bucket_item {
+ size_t count;
+};
+
+struct heuristic_ctx {
+ /* Partial copy of input data */
+ const u8 *sample;
+ size_t sample_size;
+
+ /* Buckets store counters for each byte value
+ *
+ * For statistical analysis of the input data we consider bytes that form a
+ * Galois Field of 256 objects. Each object has an attribute count, ie. how
+ * many times the object appeared in the sample.
+ */
+ struct bucket_item bucket[BUCKET_SIZE];
+ struct bucket_item aux_bucket[BUCKET_SIZE];
+
+ struct list_head list;
+};
+
+/*
+ * Shannon Entropy calculation.
+ *
+ * Pure byte distribution analysis fails to determine compressibility of data.
+ * Try calculating entropy to estimate the average minimum number of bits
+ * needed to encode the sampled data.
+ *
+ * For convenience, return the percentage of needed bits, instead of amount of
+ * bits directly.
+ *
+ * @ENTROPY_LEVEL_OK - below that threshold, sample has low byte entropy
+ * and can be compressible with high probability
+ *
+ * @ENTROPY_LEVEL_HIGH - data are not compressible with high probability
+ *
+ * Use of ilog2() decreases precision, we lower the LVL to 5 to compensate.
+ */
+#define ENTROPY_LEVEL_OK 65
+#define ENTROPY_LEVEL_HIGH 80
+
+/*
+ * For increasead precision in shannon_entropy calculation,
+ * let's do pow(n, M) to save more digits after comma:
+ *
+ * - maximum int bit length is 64
+ * - ilog2(MAX_SAMPLE_SIZE) -> 13
+ * - 13 * 4 = 52 < 64 -> M = 4
+ *
+ * So use pow(n, 4).
+ */
+static inline u32 ilog2_w(u64 n)
+{
+ return ilog2(n * n * n * n);
+}
+
+static u32 shannon_entropy(struct heuristic_ctx *ctx)
+{
+ const size_t max = 8 * ilog2_w(2);
+ size_t i, p, p_base, sz_base, sum = 0;
+
+ sz_base = ilog2_w(ctx->sample_size);
+
+ for (i = 0; i < 256 && ctx->bucket[i].count > 0; i++) {
+ p = ctx->bucket[i].count;
+ p_base = ilog2_w(p);
+ sum += p * (sz_base - p_base);
+ }
+
+ sum /= ctx->sample_size;
+
+ return sum * 100 / max;
+}
+
+#define RADIX_BASE 4U
+#define COUNTERS_SIZE (1U << RADIX_BASE)
+
+static __always_inline u8 get4bits(u64 num, int shift)
+{
+ /* Reverse order */
+ return ((COUNTERS_SIZE - 1) - ((num >> shift) % COUNTERS_SIZE));
+}
+
+/*
+ * Use 4 bits as radix base
+ * Use 16 u32 counters for calculating new position in buf array
+ *
+ * @array - array that will be sorted
+ * @aux - buffer array to store sorting results
+ * must be equal in size to @array
+ * @num - array size
+ */
+static void radix_sort(struct bucket_item *array, struct bucket_item *aux, int num)
+{
+ size_t buf_num, max_num, addr, new_addr, counters[COUNTERS_SIZE];
+ int bitlen, shift, i;
+
+ /*
+ * Try avoid useless loop iterations for small numbers stored in big
+ * counters. Example: 48 33 4 ... in 64bit array
+ */
+ max_num = array[0].count;
+ for (i = 1; i < num; i++) {
+ buf_num = array[i].count;
+
+ if (buf_num > max_num)
+ max_num = buf_num;
+ }
+
+ buf_num = ilog2(max_num);
+ bitlen = ALIGN(buf_num, RADIX_BASE * 2);
+
+ shift = 0;
+ while (shift < bitlen) {
+ memset(counters, 0, sizeof(counters));
+
+ for (i = 0; i < num; i++) {
+ buf_num = array[i].count;
+ addr = get4bits(buf_num, shift);
+ counters[addr]++;
+ }
+
+ for (i = 1; i < COUNTERS_SIZE; i++)
+ counters[i] += counters[i - 1];
+
+ for (i = num - 1; i >= 0; i--) {
+ buf_num = array[i].count;
+ addr = get4bits(buf_num, shift);
+ counters[addr]--;
+ new_addr = counters[addr];
+ aux[new_addr] = array[i];
+ }
+
+ shift += RADIX_BASE;
+
+ /*
+ * Normal radix expects to move data from a temporary array, to
+ * the main one. But that requires some CPU time. Avoid that
+ * by doing another sort iteration to original array instead of
+ * memcpy()
+ */
+ memset(counters, 0, sizeof(counters));
+
+ for (i = 0; i < num; i ++) {
+ buf_num = aux[i].count;
+ addr = get4bits(buf_num, shift);
+ counters[addr]++;
+ }
+
+ for (i = 1; i < COUNTERS_SIZE; i++)
+ counters[i] += counters[i - 1];
+
+ for (i = num - 1; i >= 0; i--) {
+ buf_num = aux[i].count;
+ addr = get4bits(buf_num, shift);
+ counters[addr]--;
+ new_addr = counters[addr];
+ array[new_addr] = aux[i];
+ }
+
+ shift += RADIX_BASE;
+ }
+}
+
+/*
+ * Count how many bytes cover 90% of the sample.
+ *
+ * There are several types of structured binary data that use nearly all byte
+ * values. The distribution can be uniform and counts in all buckets will be
+ * nearly the same (eg. encrypted data). Unlikely to be compressible.
+ *
+ * Other possibility is normal (Gaussian) distribution, where the data could
+ * be potentially compressible, but we have to take a few more steps to decide
+ * how much.
+ *
+ * @BYTE_COVERAGE_LOW - main part of byte values repeated frequently,
+ * compression algo can easy fix that
+ * @BYTE_COVERAGE_HIGH - data have uniform distribution and with high
+ * probability is not compressible
+ */
+#define BYTE_COVERAGE_LOW 64
+#define BYTE_COVERAGE_HIGH 200
+
+static int byte_coverage(struct heuristic_ctx *ctx)
+{
+ const size_t threshold = ctx->sample_size * 90 / 100;
+ struct bucket_item *bkt = &ctx->bucket[0];
+ size_t sum = 0;
+ int i;
+
+ /* Sort in reverse order */
+ radix_sort(ctx->bucket, ctx->aux_bucket, BUCKET_SIZE);
+
+ for (i = 0; i < BYTE_COVERAGE_LOW; i++)
+ sum += bkt[i].count;
+
+ if (sum > threshold)
+ return i;
+
+ for (; i < BYTE_COVERAGE_HIGH && bkt[i].count > 0; i++) {
+ sum += bkt[i].count;
+ if (sum > threshold)
+ break;
+ }
+
+ return i;
+}
+
+/*
+ * Count ASCII bytes in buckets.
+ *
+ * This heuristic can detect textual data (configs, xml, json, html, etc).
+ * Because in most text-like data byte set is restricted to limited number of
+ * possible characters, and that restriction in most cases makes data easy to
+ * compress.
+ *
+ * @ASCII_COUNT_THRESHOLD - consider all data within this byte set size:
+ * less - compressible
+ * more - need additional analysis
+ */
+#define ASCII_COUNT_THRESHOLD 64
+
+static __always_inline u32 ascii_count(const struct heuristic_ctx *ctx)
+{
+ size_t count = 0;
+ int i;
+
+ for (i = 0; i < ASCII_COUNT_THRESHOLD; i++)
+ if (ctx->bucket[i].count > 0)
+ count++;
+
+ /*
+ * Continue collecting count of byte values in buckets. If the byte
+ * set size is bigger then the threshold, it's pointless to continue,
+ * the detection technique would fail for this type of data.
+ */
+ for (; i < 256; i++) {
+ if (ctx->bucket[i].count > 0) {
+ count++;
+ if (count > ASCII_COUNT_THRESHOLD)
+ break;
+ }
+ }
+
+ return count;
+}
+
+static __always_inline struct heuristic_ctx *heuristic_init(const u8 *buf, size_t len)
+{
+ struct heuristic_ctx *ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ int i = 0, s = 0;
+
+ if (!ctx)
+ return ERR_PTR(-ENOMEM);
+
+ ctx->sample = kzalloc(MAX_SAMPLE_SIZE, GFP_KERNEL);
+ if (!ctx->sample) {
+ kfree(ctx);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ if (len > MAX_SAMPLE_SIZE)
+ len = MAX_SAMPLE_SIZE;
+
+ while (i < len - SAMPLING_READ_SIZE) {
+ memcpy((void *)&ctx->sample[s], &buf[i], SAMPLING_READ_SIZE);
+ i += SAMPLING_INTERVAL;
+ s += SAMPLING_INTERVAL;
+ }
+
+ ctx->sample_size = s;
+
+ INIT_LIST_HEAD(&ctx->list);
+
+ return ctx;
+}
+
+static __always_inline bool sample_repeated_patterns(struct heuristic_ctx *ctx)
+{
+ const size_t half = ctx->sample_size / 2;
+
+ return (memcmp(&ctx->sample[0], &ctx->sample[half], half) == 0);
+}
+
+static int is_compressible(const void *buf, size_t len)
+{
+ struct heuristic_ctx *ctx;
+ int i, ret = 0;
+ u8 byte;
+
+ ctx = heuristic_init(buf, len);
+ if (!ctx)
+ return -ENOMEM;
+
+ /*
+ * Parse from low-hanging fruits (compressible) to "need more analysis" (uncompressible).
+ */
+
+ ret = 1;
+
+ if (sample_repeated_patterns(ctx))
+ goto out;
+
+ for (i = 0; i < ctx->sample_size; i++) {
+ byte = ctx->sample[i];
+ ctx->bucket[byte].count++;
+ }
+
+ if (ascii_count(ctx) < ASCII_COUNT_THRESHOLD)
+ goto out;
+
+ i = byte_coverage(ctx);
+ if (i <= BYTE_COVERAGE_LOW)
+ goto out;
+
+ if (i >= BYTE_COVERAGE_HIGH) {
+ ret = 0;
+ goto out;
+ }
+
+ i = shannon_entropy(ctx);
+ if (i <= ENTROPY_LEVEL_OK)
+ goto out;
+
+ /*
+ * For the levels below ENTROPY_LVL_HIGH, additional analysis would be
+ * needed to give green light to compression.
+ *
+ * For now just assume that compression at that level is not worth the
+ * resources because:
+ *
+ * 1. it is possible to defrag the data later
+ *
+ * 2. the data would turn out to be hardly compressible, eg. 150 byte
+ * values, every bucket has counter at level ~54. The heuristic would
+ * be confused. This can happen when data have some internal repeated
+ * patterns like "abbacbbc...". This can be detected by analyzing
+ * pairs of bytes, which is too costly.
+ */
+ if (i < ENTROPY_LEVEL_HIGH)
+ ret = 1;
+ else
+ ret = 0;
+out:
+ kvfree(ctx->sample);
+ kfree(ctx);
+
+ return ret;
+}
+
static void pattern_scan(const u8 *src, size_t src_len,
struct smb2_compression_pattern_v1 *fwd,
struct smb2_compression_pattern_v1 *bwd)
@@ -204,6 +572,8 @@ int smb_compress(struct smb_rqst *src_rq, struct smb_rqst *dst_rq, bool chained)
goto err_free;
}
+ pr_err("%s: is compressible %d\n", __func__, is_compressible(src, data_len));
+
dst_rq->rq_iov->iov_base = kvzalloc(SMB_COMPRESS_HDR_LEN + buf_len + data_len, GFP_KERNEL);
if (!dst_rq->rq_iov->iov_base)
goto err_free;
@@ -227,6 +597,7 @@ int smb_compress(struct smb_rqst *src_rq, struct smb_rqst *dst_rq, bool chained)
memcpy(dst + SMB_COMPRESS_HDR_LEN, src_rq->rq_iov->iov_base, buf_len);
ret = compress_data(src, data_len, dst + SMB_COMPRESS_HDR_LEN + buf_len, &data_len, chained);
+ pr_err("%s: compress ret %d\n", __func__, ret);
err_free:
kvfree(src);
if (!ret) {
diff --git a/fs/smb/client/splice.c b/fs/smb/client/splice.c
new file mode 100644
index 000000000000..1125fe2868d5
--- /dev/null
+++ b/fs/smb/client/splice.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) SUSE 2024
+ * Author(s): Enzo Matsumiya <ematsumiya@suse.de>
+ *
+ * Splice support for cifs.ko
+ */
+#include "splice.h"
+
+ssize_t cifs_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe,
+ size_t len, unsigned int flags)
+{
+ pr_err("%s: start pos %lld, len %zu, flags 0x%x\n", __func__, ppos ? *ppos : 0, len, flags);
+ return in->f_op->splice_read(in, ppos, pipe, len, flags);
+#if 0
+ struct folio_batch fbatch;
+ struct kiocb iocb;
+ size_t total_spliced = 0, used, npages;
+ loff_t isize, end_offset;
+ bool writably_mapped;
+ int i, error = 0;
+
+ if (unlikely(*ppos >= in->f_mapping->host->i_sb->s_maxbytes))
+ return 0;
+
+ init_sync_kiocb(&iocb, in);
+ iocb.ki_pos = *ppos;
+
+ /* Work out how much data we can actually add into the pipe */
+ used = pipe_occupancy(pipe->head, pipe->tail);
+ npages = max_t(ssize_t, pipe->max_usage - used, 0);
+ len = min_t(size_t, len, npages * PAGE_SIZE);
+
+ folio_batch_init(&fbatch);
+
+ do {
+ cond_resched();
+
+ if (*ppos >= i_size_read(in->f_mapping->host))
+ break;
+
+ iocb.ki_pos = *ppos;
+ error = filemap_get_pages(&iocb, len, &fbatch, true);
+ if (error < 0)
+ break;
+
+ /*
+ * i_size must be checked after we know the pages are Uptodate.
+ *
+ * Checking i_size after the check allows us to calculate
+ * the correct value for "nr", which means the zero-filled
+ * part of the page is not copied back to userspace (unless
+ * another truncate extends the file - this is desired though).
+ */
+ isize = i_size_read(in->f_mapping->host);
+ if (unlikely(*ppos >= isize))
+ break;
+ end_offset = min_t(loff_t, isize, *ppos + len);
+
+ /*
+ * Once we start copying data, we don't want to be touching any
+ * cachelines that might be contended:
+ */
+ writably_mapped = mapping_writably_mapped(in->f_mapping);
+
+ for (i = 0; i < folio_batch_count(&fbatch); i++) {
+ struct folio *folio = fbatch.folios[i];
+ size_t n;
+
+ if (folio_pos(folio) >= end_offset)
+ goto out;
+ folio_mark_accessed(folio);
+
+ /*
+ * If users can be writing to this folio using arbitrary
+ * virtual addresses, take care of potential aliasing
+ * before reading the folio on the kernel side.
+ */
+ if (writably_mapped)
+ flush_dcache_folio(folio);
+
+ n = min_t(loff_t, len, isize - *ppos);
+ n = splice_folio_into_pipe(pipe, folio, *ppos, n);
+ if (!n)
+ goto out;
+ len -= n;
+ total_spliced += n;
+ *ppos += n;
+ in->f_ra.prev_pos = *ppos;
+ if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
+ goto out;
+ }
+
+ folio_batch_release(&fbatch);
+ } while (len);
+
+out:
+ folio_batch_release(&fbatch);
+ file_accessed(in);
+
+ return total_spliced ? total_spliced : error;
+#endif
+}
+
+ssize_t
+iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
+ loff_t *ppos, size_t len, unsigned int flags)
+{
+ pr_err("%s: start fname %s, pos %lld, len %zu, flags 0x%x\n", __func__, out->f_path.dentry->d_name.name, ppos ? *ppos : 0, len, flags);
+ return out->f_op->splice_write(pipe, out, ppos, len, flags);
+#if 0
+ struct splice_desc sd = {
+ .total_len = len,
+ .flags = flags,
+ .pos = *ppos,
+ .u.file = out,
+ };
+ int nbufs = pipe->max_usage;
+ struct bio_vec *array;
+ ssize_t ret;
+
+ if (!out->f_op->write_iter)
+ return -EINVAL;
+
+ array = kcalloc(nbufs, sizeof(struct bio_vec), GFP_KERNEL);
+ if (unlikely(!array))
+ return -ENOMEM;
+
+ pipe_lock(pipe);
+
+ splice_from_pipe_begin(&sd);
+ while (sd.total_len) {
+ struct kiocb kiocb;
+ struct iov_iter from;
+ unsigned int head, tail, mask;
+ size_t left;
+ int n;
+
+ ret = splice_from_pipe_next(pipe, &sd);
+ if (ret <= 0)
+ break;
+
+ if (unlikely(nbufs < pipe->max_usage)) {
+ kfree(array);
+ nbufs = pipe->max_usage;
+ array = kcalloc(nbufs, sizeof(struct bio_vec),
+ GFP_KERNEL);
+ if (!array) {
+ ret = -ENOMEM;
+ break;
+ }
+ }
+
+ head = pipe->head;
+ tail = pipe->tail;
+ mask = pipe->ring_size - 1;
+
+ /* build the vector */
+ left = sd.total_len;
+ for (n = 0; !pipe_empty(head, tail) && left && n < nbufs; tail++) {
+ struct pipe_buffer *buf = &pipe->bufs[tail & mask];
+ size_t this_len = buf->len;
+
+ /* zero-length bvecs are not supported, skip them */
+ if (!this_len)
+ continue;
+ this_len = min(this_len, left);
+
+ ret = pipe_buf_confirm(pipe, buf);
+ if (unlikely(ret)) {
+ if (ret == -ENODATA)
+ ret = 0;
+ goto done;
+ }
+
+ bvec_set_page(&array[n], buf->page, this_len,
+ buf->offset);
+ left -= this_len;
+ n++;
+ }
+
+ iov_iter_bvec(&from, ITER_SOURCE, array, n, sd.total_len - left);
+ init_sync_kiocb(&kiocb, out);
+ kiocb.ki_pos = sd.pos;
+ ret = call_write_iter(out, &kiocb, &from);
+ sd.pos = kiocb.ki_pos;
+ if (ret <= 0)
+ break;
+
+ sd.num_spliced += ret;
+ sd.total_len -= ret;
+ *ppos = sd.pos;
+
+ /* dismiss the fully eaten buffers, adjust the partial one */
+ tail = pipe->tail;
+ while (ret) {
+ struct pipe_buffer *buf = &pipe->bufs[tail & mask];
+ if (ret >= buf->len) {
+ ret -= buf->len;
+ buf->len = 0;
+ pipe_buf_release(pipe, buf);
+ tail++;
+ pipe->tail = tail;
+ if (pipe->files)
+ sd.need_wakeup = true;
+ } else {
+ buf->offset += ret;
+ buf->len -= ret;
+ ret = 0;
+ }
+ }
+ }
+done:
+ kfree(array);
+ splice_from_pipe_end(pipe, &sd);
+
+ pipe_unlock(pipe);
+
+ if (sd.num_spliced)
+ ret = sd.num_spliced;
+
+ return ret;
+#endif
+}
+
diff --git a/fs/smb/client/splice.h b/fs/smb/client/splice.h
new file mode 100644
index 000000000000..6c88402bb489
--- /dev/null
+++ b/fs/smb/client/splice.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+/*
+ * Copyright (C) SUSE 2024
+ * Author(s): Enzo Matsumiya <ematsumiya@suse.de>
+ *
+ * Splice support for cifs.ko
+ */
+#ifndef _CIFS_SPLICE_H
+#define _CIFS_SPLICE_H
+
+#include <linux/pagevec.h>
+#include <linux/fs.h>
+#include <linux/pipe_fs_i.h>
+
+/**
+ * cifs_splice_read - Splice data from a file's pagecache into a pipe
+ * @in: The file to read from
+ * @ppos: Pointer to the file position to read from
+ * @pipe: The pipe to splice into
+ * @len: The amount to splice
+ * @flags: The SPLICE_F_* flags
+ *
+ * This function gets folios from a file's pagecache and splices them into the
+ * pipe. Readahead will be called as necessary to fill more folios. This may
+ * be used for blockdevs also.
+ *
+ * Return: On success, the number of bytes read will be returned and *@ppos
+ * will be updated if appropriate; 0 will be returned if there is no more data
+ * to be read; -EAGAIN will be returned if the pipe had no space, and some
+ * other negative error code will be returned on error. A short read may occur
+ * if the pipe has insufficient space, we reach the end of the data or we hit a
+ * hole.
+ */
+ssize_t cifs_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe,
+ size_t len, unsigned int flags);
+
+/**
+ * cifs_splice_write - splice data from a pipe to a file
+ * @pipe: pipe info
+ * @out: file to write to
+ * @ppos: position in @out
+ * @len: number of bytes to splice
+ * @flags: splice modifier flags
+ *
+ * Description:
+ * Will either move or copy pages (determined by @flags options) from
+ * the given pipe inode to the given file.
+ * This one is ->write_iter-based.
+ *
+ */
+ssize_t cifs_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos,
+ size_t len, unsigned int flags);
+#endif /* !_CIFS_SPLICE_H */