smb: client: add splice supportsmb-compression-splice

Signed-off-by: Enzo Matsumiya <ematsumiya@suse.de>
author: Enzo Matsumiya <ematsumiya@suse.de> 2024-05-21 16:07:50 -0300
committer: Enzo Matsumiya <ematsumiya@suse.de> 2024-05-21 16:07:50 -0300
commit: 813f0f07cc264107457d9ec8b0b74243a6ccb08e (patch)
tree: e7c72896829020bda20eda35588a3bb562789c48
parent: 04597d141a44415c1e2220b5f821c23f02efa00e (diff)
download: linux-smb-compression-splice.tar.gz
linux-smb-compression-splice.tar.bz2
linux-smb-compression-splice.zip
5 files changed, 693 insertions, 2 deletions
diff --git a/fs/smb/client/Kconfig b/fs/smb/client/Kconfig
index 9f05f94e265a..7676402ce805 100644
--- a/fs/smb/client/Kconfig
+++ b/fs/smb/client/Kconfig
@@ -217,4 +217,16 @@ config CIFS_COMPRESSION
 	  Say Y here if you want SMB traffic to be compressed.
 	  If unsure, say N.
 
+config CIFS_SPLICE
+	bool "Enable splice(2) support (EXPERIMENTAL)"
+	depends on CIFS
+	default n
+	help
+	  This enables support for splice(2) operations on SMB mounts.
+
+	  Requires an up-to-date cifs-utils install.
+
+	  Say Y here if you want SMB mounts to support splice.
+	  If unsure, say N.
+
 endif
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index 6de2ea59e644..939fa5b55e45 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -1541,8 +1541,13 @@ const struct file_operations cifs_file_ops = {
 	.fsync = cifs_fsync,
 	.flush = cifs_flush,
 	.mmap  = cifs_file_mmap,
+#ifndef CONFIG_SPLICE
 	.splice_read = filemap_splice_read,
 	.splice_write = iter_file_splice_write,
+#else /* !CONFIG_SPLICE */
+	.splice_read = cifs_splice_read,
+	.splice_write = cifs_splice_write,
+#endif /* !CONFIG_SPLICE */
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
@@ -1561,8 +1566,13 @@ const struct file_operations cifs_file_strict_ops = {
 	.fsync = cifs_strict_fsync,
 	.flush = cifs_flush,
 	.mmap = cifs_file_strict_mmap,
+#ifndef CONFIG_SPLICE
 	.splice_read = filemap_splice_read,
 	.splice_write = iter_file_splice_write,
+#else /* !CONFIG_SPLICE */
+	.splice_read = cifs_splice_read,
+	.splice_write = cifs_splice_write,
+#endif /* !CONFIG_SPLICE */
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
@@ -1581,8 +1591,13 @@ const struct file_operations cifs_file_direct_ops = {
 	.fsync = cifs_fsync,
 	.flush = cifs_flush,
 	.mmap = cifs_file_mmap,
-	.splice_read = copy_splice_read,
+#ifndef CONFIG_SPLICE
+	.splice_read = filemap_splice_read,
 	.splice_write = iter_file_splice_write,
+#else /* !CONFIG_SPLICE */
+	.splice_read = cifs_splice_read,
+	.splice_write = cifs_splice_write,
+#endif /* !CONFIG_SPLICE */
 	.unlocked_ioctl  = cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
 	.remap_file_range = cifs_remap_file_range,
@@ -1599,8 +1614,13 @@ const struct file_operations cifs_file_nobrl_ops = {
 	.fsync = cifs_fsync,
 	.flush = cifs_flush,
 	.mmap  = cifs_file_mmap,
+#ifndef CONFIG_SPLICE
 	.splice_read = filemap_splice_read,
 	.splice_write = iter_file_splice_write,
+#else /* !CONFIG_SPLICE */
+	.splice_read = cifs_splice_read,
+	.splice_write = cifs_splice_write,
+#endif /* !CONFIG_SPLICE */
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
@@ -1617,8 +1637,13 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
 	.fsync = cifs_strict_fsync,
 	.flush = cifs_flush,
 	.mmap = cifs_file_strict_mmap,
+#ifndef CONFIG_SPLICE
 	.splice_read = filemap_splice_read,
 	.splice_write = iter_file_splice_write,
+#else /* !CONFIG_SPLICE */
+	.splice_read = cifs_splice_read,
+	.splice_write = cifs_splice_write,
+#endif /* !CONFIG_SPLICE */
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
@@ -1635,8 +1660,13 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
 	.fsync = cifs_fsync,
 	.flush = cifs_flush,
 	.mmap = cifs_file_mmap,
-	.splice_read = copy_splice_read,
+#ifndef CONFIG_SPLICE
+	.splice_read = filemap_splice_read,
 	.splice_write = iter_file_splice_write,
+#else /* !CONFIG_SPLICE */
+	.splice_read = cifs_splice_read,
+	.splice_write = cifs_splice_write,
+#endif /* !CONFIG_SPLICE */
 	.unlocked_ioctl  = cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
 	.remap_file_range = cifs_remap_file_range,
diff --git a/fs/smb/client/compress.c b/fs/smb/client/compress.c
index af14508c2b3b..17baa9ebc639 100644
--- a/fs/smb/client/compress.c
+++ b/fs/smb/client/compress.c
@@ -23,6 +23,374 @@
 #include "compress/lz77.h"
 #include "compress.h"
 
+#define SAMPLING_READ_SIZE	(16)
+#define SAMPLING_INTERVAL	(256)
+#define BUCKET_SIZE		(256)
+/*
+ * The size of the sample is based on a statistical sampling rule of thumb.
+ * The common way is to perform sampling tests as long as the number of
+ * elements in each cell is at least 5.
+ *
+ * Instead of 5, we choose 32 to obtain more accurate results.
+ * If the data contain the maximum number of symbols, which is 256, we obtain a
+ * sample size bound by 8192.
+ *
+ * For a sample of at most 8KB of data per data range: 16 consecutive bytes
+ * from up to 512 locations.
+ */
+#define MAX_SAMPLE_SIZE		(8192 * SAMPLING_READ_SIZE / SAMPLING_INTERVAL)
+//				  ^ == LZ77 window size
+
+struct bucket_item {
+	size_t count;
+};
+
+struct heuristic_ctx {
+	/* Partial copy of input data */
+	const u8 *sample;
+	size_t sample_size;
+
+	/* Buckets store counters for each byte value
+	 *
+	 * For statistical analysis of the input data we consider bytes that form a
+	 * Galois Field of 256 objects. Each object has an attribute count, ie. how
+	 * many times the object appeared in the sample.
+	 */
+	struct bucket_item bucket[BUCKET_SIZE];
+	struct bucket_item aux_bucket[BUCKET_SIZE];
+
+	struct list_head list;
+};
+
+/*
+ * Shannon Entropy calculation.
+ *
+ * Pure byte distribution analysis fails to determine compressibility of data.
+ * Try calculating entropy to estimate the average minimum number of bits
+ * needed to encode the sampled data.
+ *
+ * For convenience, return the percentage of needed bits, instead of amount of
+ * bits directly.
+ *
+ * @ENTROPY_LEVEL_OK - below that threshold, sample has low byte entropy
+ *		       and can be compressible with high probability
+ *
+ * @ENTROPY_LEVEL_HIGH - data are not compressible with high probability
+ *
+ * Use of ilog2() decreases precision, we lower the LVL to 5 to compensate.
+ */
+#define ENTROPY_LEVEL_OK 65
+#define ENTROPY_LEVEL_HIGH 80
+
+/*
+ * For increasead precision in shannon_entropy calculation,
+ * let's do pow(n, M) to save more digits after comma:
+ *
+ * - maximum int bit length is 64
+ * - ilog2(MAX_SAMPLE_SIZE) -> 13
+ * - 13 * 4 = 52 < 64 -> M = 4
+ *
+ * So use pow(n, 4).
+ */
+static inline u32 ilog2_w(u64 n)
+{
+	return ilog2(n * n * n * n);
+}
+
+static u32 shannon_entropy(struct heuristic_ctx *ctx)
+{
+	const size_t max = 8 * ilog2_w(2);
+	size_t i, p, p_base, sz_base, sum = 0;
+
+	sz_base = ilog2_w(ctx->sample_size);
+
+	for (i = 0; i < 256 && ctx->bucket[i].count > 0; i++) {
+		p = ctx->bucket[i].count;
+		p_base = ilog2_w(p);
+		sum += p * (sz_base - p_base);
+	}
+
+	sum /= ctx->sample_size;
+
+	return sum * 100 / max;
+}
+
+#define RADIX_BASE 4U
+#define COUNTERS_SIZE (1U << RADIX_BASE)
+
+static __always_inline u8 get4bits(u64 num, int shift)
+{
+	/* Reverse order */
+	return ((COUNTERS_SIZE - 1) - ((num >> shift) % COUNTERS_SIZE));
+}
+
+/*
+ * Use 4 bits as radix base
+ * Use 16 u32 counters for calculating new position in buf array
+ *
+ * @array     - array that will be sorted
+ * @aux	      - buffer array to store sorting results
+ *              must be equal in size to @array
+ * @num       - array size
+ */
+static void radix_sort(struct bucket_item *array, struct bucket_item *aux, int num)
+{
+	size_t buf_num, max_num, addr, new_addr, counters[COUNTERS_SIZE];
+	int bitlen, shift, i;
+
+	/*
+	 * Try avoid useless loop iterations for small numbers stored in big
+	 * counters.  Example: 48 33 4 ... in 64bit array
+	 */
+	max_num = array[0].count;
+	for (i = 1; i < num; i++) {
+		buf_num = array[i].count;
+
+		if (buf_num > max_num)
+			max_num = buf_num;
+	}
+
+	buf_num = ilog2(max_num);
+	bitlen = ALIGN(buf_num, RADIX_BASE * 2);
+
+	shift = 0;
+	while (shift < bitlen) {
+		memset(counters, 0, sizeof(counters));
+
+		for (i = 0; i < num; i++) {
+			buf_num = array[i].count;
+			addr = get4bits(buf_num, shift);
+			counters[addr]++;
+		}
+
+		for (i = 1; i < COUNTERS_SIZE; i++)
+			counters[i] += counters[i - 1];
+
+		for (i = num - 1; i >= 0; i--) {
+			buf_num = array[i].count;
+			addr = get4bits(buf_num, shift);
+			counters[addr]--;
+			new_addr = counters[addr];
+			aux[new_addr] = array[i];
+		}
+
+		shift += RADIX_BASE;
+
+		/*
+		 * Normal radix expects to move data from a temporary array, to
+		 * the main one.  But that requires some CPU time. Avoid that
+		 * by doing another sort iteration to original array instead of
+		 * memcpy()
+		 */
+		memset(counters, 0, sizeof(counters));
+
+		for (i = 0; i < num; i ++) {
+			buf_num = aux[i].count;
+			addr = get4bits(buf_num, shift);
+			counters[addr]++;
+		}
+
+		for (i = 1; i < COUNTERS_SIZE; i++)
+			counters[i] += counters[i - 1];
+
+		for (i = num - 1; i >= 0; i--) {
+			buf_num = aux[i].count;
+			addr = get4bits(buf_num, shift);
+			counters[addr]--;
+			new_addr = counters[addr];
+			array[new_addr] = aux[i];
+		}
+
+		shift += RADIX_BASE;
+	}
+}
+
+/*
+ * Count how many bytes cover 90% of the sample.
+ *
+ * There are several types of structured binary data that use nearly all byte
+ * values. The distribution can be uniform and counts in all buckets will be
+ * nearly the same (eg. encrypted data). Unlikely to be compressible.
+ *
+ * Other possibility is normal (Gaussian) distribution, where the data could
+ * be potentially compressible, but we have to take a few more steps to decide
+ * how much.
+ *
+ * @BYTE_COVERAGE_LOW  - main part of byte values repeated frequently,
+ *		      compression algo can easy fix that
+ * @BYTE_COVERAGE_HIGH - data have uniform distribution and with high
+ *                    probability is not compressible
+ */
+#define BYTE_COVERAGE_LOW 64
+#define BYTE_COVERAGE_HIGH	200
+
+static int byte_coverage(struct heuristic_ctx *ctx)
+{
+	const size_t threshold = ctx->sample_size * 90 / 100;
+	struct bucket_item *bkt = &ctx->bucket[0];
+	size_t sum = 0;
+	int i;
+
+	/* Sort in reverse order */
+	radix_sort(ctx->bucket, ctx->aux_bucket, BUCKET_SIZE);
+
+	for (i = 0; i < BYTE_COVERAGE_LOW; i++)
+		sum += bkt[i].count;
+
+	if (sum > threshold)
+		return i;
+
+	for (; i < BYTE_COVERAGE_HIGH && bkt[i].count > 0; i++) {
+		sum += bkt[i].count;
+		if (sum > threshold)
+			break;
+	}
+
+	return i;
+}
+
+/*
+ * Count ASCII bytes in buckets.
+ *
+ * This heuristic can detect textual data (configs, xml, json, html, etc).
+ * Because in most text-like data byte set is restricted to limited number of
+ * possible characters, and that restriction in most cases makes data easy to
+ * compress.
+ *
+ * @ASCII_COUNT_THRESHOLD - consider all data within this byte set size:
+ *	less - compressible
+ *	more - need additional analysis
+ */
+#define ASCII_COUNT_THRESHOLD 64
+
+static __always_inline u32 ascii_count(const struct heuristic_ctx *ctx)
+{
+	size_t count = 0;
+	int i;
+
+	for (i = 0; i < ASCII_COUNT_THRESHOLD; i++)
+		if (ctx->bucket[i].count > 0)
+			count++;
+
+	/*
+	 * Continue collecting count of byte values in buckets.  If the byte
+	 * set size is bigger then the threshold, it's pointless to continue,
+	 * the detection technique would fail for this type of data.
+	 */
+	for (; i < 256; i++) {
+		if (ctx->bucket[i].count > 0) {
+			count++;
+			if (count > ASCII_COUNT_THRESHOLD)
+				break;
+		}
+	}
+
+	return count;
+}
+
+static __always_inline struct heuristic_ctx *heuristic_init(const u8 *buf, size_t len)
+{
+	struct heuristic_ctx *ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	int i = 0, s = 0;
+
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	ctx->sample = kzalloc(MAX_SAMPLE_SIZE, GFP_KERNEL);
+	if (!ctx->sample) {
+		kfree(ctx);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	if (len > MAX_SAMPLE_SIZE)
+		len = MAX_SAMPLE_SIZE;
+
+	while (i < len - SAMPLING_READ_SIZE) {
+		memcpy((void *)&ctx->sample[s], &buf[i], SAMPLING_READ_SIZE);
+		i += SAMPLING_INTERVAL;
+		s += SAMPLING_INTERVAL;
+	}
+
+	ctx->sample_size = s;
+
+	INIT_LIST_HEAD(&ctx->list);
+
+	return ctx;
+}
+
+static __always_inline bool sample_repeated_patterns(struct heuristic_ctx *ctx)
+{
+	const size_t half = ctx->sample_size / 2;
+
+	return (memcmp(&ctx->sample[0], &ctx->sample[half], half) == 0);
+}
+
+static int is_compressible(const void *buf, size_t len)
+{
+	struct heuristic_ctx *ctx;
+	int i, ret = 0;
+	u8 byte;
+
+	ctx = heuristic_init(buf, len);
+	if (!ctx)
+		return -ENOMEM;
+
+	/*
+	 * Parse from low-hanging fruits (compressible) to "need more analysis" (uncompressible).
+	 */
+
+	ret = 1;
+
+	if (sample_repeated_patterns(ctx))
+		goto out;
+
+	for (i = 0; i < ctx->sample_size; i++) {
+		byte = ctx->sample[i];
+		ctx->bucket[byte].count++;
+	}
+
+	if (ascii_count(ctx) < ASCII_COUNT_THRESHOLD)
+		goto out;
+
+	i = byte_coverage(ctx);
+	if (i <= BYTE_COVERAGE_LOW)
+		goto out;
+
+	if (i >= BYTE_COVERAGE_HIGH) {
+		ret = 0;
+		goto out;
+	}
+
+	i = shannon_entropy(ctx);
+	if (i <= ENTROPY_LEVEL_OK)
+		goto out;
+
+	/*
+	 * For the levels below ENTROPY_LVL_HIGH, additional analysis would be
+	 * needed to give green light to compression.
+	 *
+	 * For now just assume that compression at that level is not worth the
+	 * resources because:
+	 *
+	 * 1. it is possible to defrag the data later
+	 *
+	 * 2. the data would turn out to be hardly compressible, eg. 150 byte
+	 * values, every bucket has counter at level ~54. The heuristic would
+	 * be confused. This can happen when data have some internal repeated
+	 * patterns like "abbacbbc...". This can be detected by analyzing
+	 * pairs of bytes, which is too costly.
+	 */
+	if (i < ENTROPY_LEVEL_HIGH)
+		ret = 1;
+	else
+		ret = 0;
+out:
+	kvfree(ctx->sample);
+	kfree(ctx);
+
+	return ret;
+}
+
 static void pattern_scan(const u8 *src, size_t src_len,
 			 struct smb2_compression_pattern_v1 *fwd,
 			 struct smb2_compression_pattern_v1 *bwd)
@@ -204,6 +572,8 @@ int smb_compress(struct smb_rqst *src_rq, struct smb_rqst *dst_rq, bool chained)
 		goto err_free;
 	}
 
+	pr_err("%s: is compressible %d\n", __func__, is_compressible(src, data_len));
+
 	dst_rq->rq_iov->iov_base = kvzalloc(SMB_COMPRESS_HDR_LEN + buf_len + data_len, GFP_KERNEL);
 	if (!dst_rq->rq_iov->iov_base)
 		goto err_free;
@@ -227,6 +597,7 @@ int smb_compress(struct smb_rqst *src_rq, struct smb_rqst *dst_rq, bool chained)
 	memcpy(dst + SMB_COMPRESS_HDR_LEN, src_rq->rq_iov->iov_base, buf_len);
 
 	ret = compress_data(src, data_len, dst + SMB_COMPRESS_HDR_LEN + buf_len, &data_len, chained);
+	pr_err("%s: compress ret %d\n", __func__, ret);
 err_free:
 	kvfree(src);
 	if (!ret) {
diff --git a/fs/smb/client/splice.c b/fs/smb/client/splice.c
new file mode 100644
index 000000000000..1125fe2868d5
--- /dev/null
+++ b/fs/smb/client/splice.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) SUSE 2024
+ * Author(s): Enzo Matsumiya <ematsumiya@suse.de>
+ *
+ * Splice support for cifs.ko
+ */
+#include "splice.h"
+
+ssize_t cifs_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe,
+			 size_t len, unsigned int flags)
+{
+	pr_err("%s: start pos %lld, len %zu, flags 0x%x\n", __func__, ppos ? *ppos : 0, len, flags);
+	return in->f_op->splice_read(in, ppos, pipe, len, flags);
+#if 0
+	struct folio_batch fbatch;
+	struct kiocb iocb;
+	size_t total_spliced = 0, used, npages;
+	loff_t isize, end_offset;
+	bool writably_mapped;
+	int i, error = 0;
+
+	if (unlikely(*ppos >= in->f_mapping->host->i_sb->s_maxbytes))
+		return 0;
+
+	init_sync_kiocb(&iocb, in);
+	iocb.ki_pos = *ppos;
+
+	/* Work out how much data we can actually add into the pipe */
+	used = pipe_occupancy(pipe->head, pipe->tail);
+	npages = max_t(ssize_t, pipe->max_usage - used, 0);
+	len = min_t(size_t, len, npages * PAGE_SIZE);
+
+	folio_batch_init(&fbatch);
+
+	do {
+		cond_resched();
+
+		if (*ppos >= i_size_read(in->f_mapping->host))
+			break;
+
+		iocb.ki_pos = *ppos;
+		error = filemap_get_pages(&iocb, len, &fbatch, true);
+		if (error < 0)
+			break;
+
+		/*
+		 * i_size must be checked after we know the pages are Uptodate.
+		 *
+		 * Checking i_size after the check allows us to calculate
+		 * the correct value for "nr", which means the zero-filled
+		 * part of the page is not copied back to userspace (unless
+		 * another truncate extends the file - this is desired though).
+		 */
+		isize = i_size_read(in->f_mapping->host);
+		if (unlikely(*ppos >= isize))
+			break;
+		end_offset = min_t(loff_t, isize, *ppos + len);
+
+		/*
+		 * Once we start copying data, we don't want to be touching any
+		 * cachelines that might be contended:
+		 */
+		writably_mapped = mapping_writably_mapped(in->f_mapping);
+
+		for (i = 0; i < folio_batch_count(&fbatch); i++) {
+			struct folio *folio = fbatch.folios[i];
+			size_t n;
+
+			if (folio_pos(folio) >= end_offset)
+				goto out;
+			folio_mark_accessed(folio);
+
+			/*
+			 * If users can be writing to this folio using arbitrary
+			 * virtual addresses, take care of potential aliasing
+			 * before reading the folio on the kernel side.
+			 */
+			if (writably_mapped)
+				flush_dcache_folio(folio);
+
+			n = min_t(loff_t, len, isize - *ppos);
+			n = splice_folio_into_pipe(pipe, folio, *ppos, n);
+			if (!n)
+				goto out;
+			len -= n;
+			total_spliced += n;
+			*ppos += n;
+			in->f_ra.prev_pos = *ppos;
+			if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
+				goto out;
+		}
+
+		folio_batch_release(&fbatch);
+	} while (len);
+
+out:
+	folio_batch_release(&fbatch);
+	file_accessed(in);
+
+	return total_spliced ? total_spliced : error;
+#endif
+}
+
+ssize_t
+iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
+			  loff_t *ppos, size_t len, unsigned int flags)
+{
+	pr_err("%s: start fname %s, pos %lld, len %zu, flags 0x%x\n", __func__, out->f_path.dentry->d_name.name, ppos ? *ppos : 0, len, flags);
+	return out->f_op->splice_write(pipe, out, ppos, len, flags);
+#if 0
+	struct splice_desc sd = {
+		.total_len = len,
+		.flags = flags,
+		.pos = *ppos,
+		.u.file = out,
+	};
+	int nbufs = pipe->max_usage;
+	struct bio_vec *array;
+	ssize_t ret;
+
+	if (!out->f_op->write_iter)
+		return -EINVAL;
+
+	array = kcalloc(nbufs, sizeof(struct bio_vec), GFP_KERNEL);
+	if (unlikely(!array))
+		return -ENOMEM;
+
+	pipe_lock(pipe);
+
+	splice_from_pipe_begin(&sd);
+	while (sd.total_len) {
+		struct kiocb kiocb;
+		struct iov_iter from;
+		unsigned int head, tail, mask;
+		size_t left;
+		int n;
+
+		ret = splice_from_pipe_next(pipe, &sd);
+		if (ret <= 0)
+			break;
+
+		if (unlikely(nbufs < pipe->max_usage)) {
+			kfree(array);
+			nbufs = pipe->max_usage;
+			array = kcalloc(nbufs, sizeof(struct bio_vec),
+					GFP_KERNEL);
+			if (!array) {
+				ret = -ENOMEM;
+				break;
+			}
+		}
+
+		head = pipe->head;
+		tail = pipe->tail;
+		mask = pipe->ring_size - 1;
+
+		/* build the vector */
+		left = sd.total_len;
+		for (n = 0; !pipe_empty(head, tail) && left && n < nbufs; tail++) {
+			struct pipe_buffer *buf = &pipe->bufs[tail & mask];
+			size_t this_len = buf->len;
+
+			/* zero-length bvecs are not supported, skip them */
+			if (!this_len)
+				continue;
+			this_len = min(this_len, left);
+
+			ret = pipe_buf_confirm(pipe, buf);
+			if (unlikely(ret)) {
+				if (ret == -ENODATA)
+					ret = 0;
+				goto done;
+			}
+
+			bvec_set_page(&array[n], buf->page, this_len,
+				      buf->offset);
+			left -= this_len;
+			n++;
+		}
+
+		iov_iter_bvec(&from, ITER_SOURCE, array, n, sd.total_len - left);
+		init_sync_kiocb(&kiocb, out);
+		kiocb.ki_pos = sd.pos;
+		ret = call_write_iter(out, &kiocb, &from);
+		sd.pos = kiocb.ki_pos;
+		if (ret <= 0)
+			break;
+
+		sd.num_spliced += ret;
+		sd.total_len -= ret;
+		*ppos = sd.pos;
+
+		/* dismiss the fully eaten buffers, adjust the partial one */
+		tail = pipe->tail;
+		while (ret) {
+			struct pipe_buffer *buf = &pipe->bufs[tail & mask];
+			if (ret >= buf->len) {
+				ret -= buf->len;
+				buf->len = 0;
+				pipe_buf_release(pipe, buf);
+				tail++;
+				pipe->tail = tail;
+				if (pipe->files)
+					sd.need_wakeup = true;
+			} else {
+				buf->offset += ret;
+				buf->len -= ret;
+				ret = 0;
+			}
+		}
+	}
+done:
+	kfree(array);
+	splice_from_pipe_end(pipe, &sd);
+
+	pipe_unlock(pipe);
+
+	if (sd.num_spliced)
+		ret = sd.num_spliced;
+
+	return ret;
+#endif
+}
+
diff --git a/fs/smb/client/splice.h b/fs/smb/client/splice.h
new file mode 100644
index 000000000000..6c88402bb489
--- /dev/null
+++ b/fs/smb/client/splice.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+/*
+ * Copyright (C) SUSE 2024
+ * Author(s): Enzo Matsumiya <ematsumiya@suse.de>
+ *
+ * Splice support for cifs.ko
+ */
+#ifndef _CIFS_SPLICE_H
+#define _CIFS_SPLICE_H
+
+#include <linux/pagevec.h>
+#include <linux/fs.h>
+#include <linux/pipe_fs_i.h>
+
+/**
+ * cifs_splice_read -  Splice data from a file's pagecache into a pipe
+ * @in: The file to read from
+ * @ppos: Pointer to the file position to read from
+ * @pipe: The pipe to splice into
+ * @len: The amount to splice
+ * @flags: The SPLICE_F_* flags
+ *
+ * This function gets folios from a file's pagecache and splices them into the
+ * pipe.  Readahead will be called as necessary to fill more folios.  This may
+ * be used for blockdevs also.
+ *
+ * Return: On success, the number of bytes read will be returned and *@ppos
+ * will be updated if appropriate; 0 will be returned if there is no more data
+ * to be read; -EAGAIN will be returned if the pipe had no space, and some
+ * other negative error code will be returned on error.  A short read may occur
+ * if the pipe has insufficient space, we reach the end of the data or we hit a
+ * hole.
+ */
+ssize_t cifs_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe,
+			 size_t len, unsigned int flags);
+
+/**
+ * cifs_splice_write - splice data from a pipe to a file
+ * @pipe:	pipe info
+ * @out:	file to write to
+ * @ppos:	position in @out
+ * @len:	number of bytes to splice
+ * @flags:	splice modifier flags
+ *
+ * Description:
+ *    Will either move or copy pages (determined by @flags options) from
+ *    the given pipe inode to the given file.
+ *    This one is ->write_iter-based.
+ *
+ */
+ssize_t cifs_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos,
+			  size_t len, unsigned int flags);
+#endif /* !_CIFS_SPLICE_H */
author	Enzo Matsumiya <ematsumiya@suse.de>	2024-05-21 16:07:50 -0300
committer	Enzo Matsumiya <ematsumiya@suse.de>	2024-05-21 16:07:50 -0300
commit	813f0f07cc264107457d9ec8b0b74243a6ccb08e (patch)
tree	e7c72896829020bda20eda35588a3bb562789c48
parent	04597d141a44415c1e2220b5f821c23f02efa00e (diff)
download	linux-smb-compression-splice.tar.gz linux-smb-compression-splice.tar.bz2 linux-smb-compression-splice.zip