summaryrefslogtreecommitdiff
path: root/fs/cifs
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2022-01-24 21:13:24 +0000
committerSteve French <stfrench@microsoft.com>2023-02-20 18:36:02 -0600
commitd08089f649a0cfb2099c8551ac47eef0cc23fdf2 (patch)
tree1165d72f35311d1017bf84c417cee6c37b3c01d9 /fs/cifs
parent16541195c6d9bcad568b7c6afbf855ddc3a856aa (diff)
downloadlinux-d08089f649a0cfb2099c8551ac47eef0cc23fdf2.tar.gz
linux-d08089f649a0cfb2099c8551ac47eef0cc23fdf2.tar.bz2
linux-d08089f649a0cfb2099c8551ac47eef0cc23fdf2.zip
cifs: Change the I/O paths to use an iterator rather than a page list
Currently, the cifs I/O paths hand lists of pages from the VM interface routines at the top all the way through the intervening layers to the socket interface at the bottom. This is a problem, however, for interfacing with netfslib which passes an iterator through to the ->issue_read() method (and will pass an iterator through to the ->issue_write() method in future). Netfslib takes over bounce buffering for direct I/O, async I/O and encrypted content, so cifs doesn't need to do that. Netfslib also converts IOVEC-type iterators into BVEC-type iterators if necessary. Further, cifs needs foliating - and folios may come in a variety of sizes, so a page list pointing to an array of heterogeneous pages may cause problems in places such as where crypto is done. Change the cifs I/O paths to hand iov_iter iterators all the way through instead. Notes: (1) Some old routines are #if'd out to be removed in a follow up patch so as to avoid confusing diff, thereby making the diff output easier to follow. I've removed functions that don't overlap with anything added. (2) struct smb_rqst loses rq_pages, rq_offset, rq_npages, rq_pagesz and rq_tailsz which describe the pages forming the buffer; instead there's an rq_iter describing the source buffer and an rq_buffer which is used to hold the buffer for encryption. (3) struct cifs_readdata and cifs_writedata are similarly modified to smb_rqst. The ->read_into_pages() and ->copy_into_pages() are then replaced with passing the iterator directly to the socket. The iterators are stored in these structs so that they are persistent and don't get deallocated when the function returns (unlike if they were stack variables). (4) Buffered writeback is overhauled, borrowing the code from the afs filesystem to gather up contiguous runs of folios. The XARRAY-type iterator is then used to refer directly to the pagecache and can be passed to the socket to transmit data directly from there. This includes: cifs_extend_writeback() cifs_write_back_from_locked_folio() cifs_writepages_region() cifs_writepages() (5) Pages are converted to folios. (6) Direct I/O uses netfs_extract_user_iter() to create a BVEC-type iterator from an IOBUF/UBUF-type source iterator. (7) smb2_get_aead_req() uses netfs_extract_iter_to_sg() to extract page fragments from the iterator into the scatterlists that the crypto layer prefers. (8) smb2_init_transform_rq() attached pages to smb_rqst::rq_buffer, an xarray, to use as a bounce buffer for encryption. An XARRAY-type iterator can then be used to pass the bounce buffer to lower layers. Signed-off-by: David Howells <dhowells@redhat.com> cc: Steve French <sfrench@samba.org> cc: Shyam Prasad N <nspmangalore@gmail.com> cc: Rohith Surabattula <rohiths.msft@gmail.com> cc: Paulo Alcantara <pc@cjr.nz> cc: Jeff Layton <jlayton@kernel.org> cc: linux-cifs@vger.kernel.org Link: https://lore.kernel.org/r/164311907995.2806745.400147335497304099.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/164928620163.457102.11602306234438271112.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/165211420279.3154751.15923591172438186144.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/165348880385.2106726.3220789453472800240.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/165364827111.3334034.934805882842932881.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/166126396180.708021.271013668175370826.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/166697259595.61150.5982032408321852414.stgit@warthog.procyon.org.uk/ # rfc Link: https://lore.kernel.org/r/166732031756.3186319.12528413619888902872.stgit@warthog.procyon.org.uk/ # rfc Signed-off-by: Steve French <stfrench@microsoft.com>
Diffstat (limited to 'fs/cifs')
-rw-r--r--fs/cifs/Kconfig1
-rw-r--r--fs/cifs/cifsencrypt.c28
-rw-r--r--fs/cifs/cifsglob.h66
-rw-r--r--fs/cifs/cifsproto.h8
-rw-r--r--fs/cifs/cifssmb.c15
-rw-r--r--fs/cifs/file.c1195
-rw-r--r--fs/cifs/fscache.c22
-rw-r--r--fs/cifs/fscache.h10
-rw-r--r--fs/cifs/misc.c128
-rw-r--r--fs/cifs/smb2ops.c378
-rw-r--r--fs/cifs/smb2pdu.c53
-rw-r--r--fs/cifs/smbdirect.c262
-rw-r--r--fs/cifs/smbdirect.h4
-rw-r--r--fs/cifs/transport.c54
14 files changed, 1133 insertions, 1091 deletions
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index bbf63a9eb927..4c0d53bf931a 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -18,6 +18,7 @@ config CIFS
select DNS_RESOLVER
select ASN1
select OID_REGISTRY
+ select NETFS_SUPPORT
help
This is the client VFS module for the SMB3 family of network file
protocols (including the most recent, most secure dialect SMB3.1.1).
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 7be589aeb520..357bd27a7fd1 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -169,11 +169,11 @@ static int cifs_shash_iter(const struct iov_iter *iter, size_t maxsize,
}
int __cifs_calc_signature(struct smb_rqst *rqst,
- struct TCP_Server_Info *server, char *signature,
- struct shash_desc *shash)
+ struct TCP_Server_Info *server, char *signature,
+ struct shash_desc *shash)
{
int i;
- int rc;
+ ssize_t rc;
struct kvec *iov = rqst->rq_iov;
int n_vec = rqst->rq_nvec;
@@ -205,25 +205,9 @@ int __cifs_calc_signature(struct smb_rqst *rqst,
}
}
- /* now hash over the rq_pages array */
- for (i = 0; i < rqst->rq_npages; i++) {
- void *kaddr;
- unsigned int len, offset;
-
- rqst_page_get_length(rqst, i, &len, &offset);
-
- kaddr = (char *) kmap(rqst->rq_pages[i]) + offset;
-
- rc = crypto_shash_update(shash, kaddr, len);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not update with payload\n",
- __func__);
- kunmap(rqst->rq_pages[i]);
- return rc;
- }
-
- kunmap(rqst->rq_pages[i]);
- }
+ rc = cifs_shash_iter(&rqst->rq_iter, iov_iter_count(&rqst->rq_iter), shash);
+ if (rc < 0)
+ return rc;
rc = crypto_shash_final(shash, signature);
if (rc)
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 00ee5e7f79c6..66d107cf1064 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -212,11 +212,9 @@ static inline void cifs_free_open_info(struct cifs_open_info_data *data)
struct smb_rqst {
struct kvec *rq_iov; /* array of kvecs */
unsigned int rq_nvec; /* number of kvecs in array */
- struct page **rq_pages; /* pointer to array of page ptrs */
- unsigned int rq_offset; /* the offset to the 1st page */
- unsigned int rq_npages; /* number pages in array */
- unsigned int rq_pagesz; /* page size to use */
- unsigned int rq_tailsz; /* length of last page */
+ size_t rq_iter_size; /* Amount of data in ->rq_iter */
+ struct iov_iter rq_iter; /* Data iterator */
+ struct xarray rq_buffer; /* Page buffer for encryption */
};
struct mid_q_entry;
@@ -1421,10 +1419,11 @@ struct cifs_aio_ctx {
struct cifsFileInfo *cfile;
struct bio_vec *bv;
loff_t pos;
- unsigned int npages;
+ unsigned int nr_pinned_pages;
ssize_t rc;
unsigned int len;
unsigned int total_len;
+ unsigned int bv_need_unpin; /* If ->bv[] needs unpinning */
bool should_dirty;
/*
* Indicates if this aio_ctx is for direct_io,
@@ -1442,28 +1441,18 @@ struct cifs_readdata {
struct address_space *mapping;
struct cifs_aio_ctx *ctx;
__u64 offset;
+ ssize_t got_bytes;
unsigned int bytes;
- unsigned int got_bytes;
pid_t pid;
int result;
struct work_struct work;
- int (*read_into_pages)(struct TCP_Server_Info *server,
- struct cifs_readdata *rdata,
- unsigned int len);
- int (*copy_into_pages)(struct TCP_Server_Info *server,
- struct cifs_readdata *rdata,
- struct iov_iter *iter);
+ struct iov_iter iter;
struct kvec iov[2];
struct TCP_Server_Info *server;
#ifdef CONFIG_CIFS_SMB_DIRECT
struct smbd_mr *mr;
#endif
- unsigned int pagesz;
- unsigned int page_offset;
- unsigned int tailsz;
struct cifs_credits credits;
- unsigned int nr_pages;
- struct page **pages;
};
/* asynchronous write support */
@@ -1475,6 +1464,8 @@ struct cifs_writedata {
struct work_struct work;
struct cifsFileInfo *cfile;
struct cifs_aio_ctx *ctx;
+ struct iov_iter iter;
+ struct bio_vec *bv;
__u64 offset;
pid_t pid;
unsigned int bytes;
@@ -1483,12 +1474,7 @@ struct cifs_writedata {
#ifdef CONFIG_CIFS_SMB_DIRECT
struct smbd_mr *mr;
#endif
- unsigned int pagesz;
- unsigned int page_offset;
- unsigned int tailsz;
struct cifs_credits credits;
- unsigned int nr_pages;
- struct page **pages;
};
/*
@@ -2148,9 +2134,9 @@ static inline void move_cifs_info_to_smb2(struct smb2_file_all_info *dst, const
dst->FileNameLength = src->FileNameLength;
}
-static inline unsigned int cifs_get_num_sgs(const struct smb_rqst *rqst,
- int num_rqst,
- const u8 *sig)
+static inline int cifs_get_num_sgs(const struct smb_rqst *rqst,
+ int num_rqst,
+ const u8 *sig)
{
unsigned int len, skip;
unsigned int nents = 0;
@@ -2170,6 +2156,19 @@ static inline unsigned int cifs_get_num_sgs(const struct smb_rqst *rqst,
* rqst[1+].rq_iov[0+] data to be encrypted/decrypted
*/
for (i = 0; i < num_rqst; i++) {
+ /* We really don't want a mixture of pinned and unpinned pages
+ * in the sglist. It's hard to keep track of which is what.
+ * Instead, we convert to a BVEC-type iterator higher up.
+ */
+ if (WARN_ON_ONCE(user_backed_iter(&rqst[i].rq_iter)))
+ return -EIO;
+
+ /* We also don't want to have any extra refs or pins to clean
+ * up in the sglist.
+ */
+ if (WARN_ON_ONCE(iov_iter_extract_will_pin(&rqst[i].rq_iter)))
+ return -EIO;
+
for (j = 0; j < rqst[i].rq_nvec; j++) {
struct kvec *iov = &rqst[i].rq_iov[j];
@@ -2183,7 +2182,7 @@ static inline unsigned int cifs_get_num_sgs(const struct smb_rqst *rqst,
}
skip = 0;
}
- nents += rqst[i].rq_npages;
+ nents += iov_iter_npages(&rqst[i].rq_iter, INT_MAX);
}
nents += DIV_ROUND_UP(offset_in_page(sig) + SMB2_SIGNATURE_SIZE, PAGE_SIZE);
return nents;
@@ -2192,9 +2191,9 @@ static inline unsigned int cifs_get_num_sgs(const struct smb_rqst *rqst,
/* We can not use the normal sg_set_buf() as we will sometimes pass a
* stack object as buf.
*/
-static inline struct scatterlist *cifs_sg_set_buf(struct scatterlist *sg,
- const void *buf,
- unsigned int buflen)
+static inline void cifs_sg_set_buf(struct sg_table *sgtable,
+ const void *buf,
+ unsigned int buflen)
{
unsigned long addr = (unsigned long)buf;
unsigned int off = offset_in_page(addr);
@@ -2204,16 +2203,17 @@ static inline struct scatterlist *cifs_sg_set_buf(struct scatterlist *sg,
do {
unsigned int len = min_t(unsigned int, buflen, PAGE_SIZE - off);
- sg_set_page(sg++, vmalloc_to_page((void *)addr), len, off);
+ sg_set_page(&sgtable->sgl[sgtable->nents++],
+ vmalloc_to_page((void *)addr), len, off);
off = 0;
addr += PAGE_SIZE;
buflen -= len;
} while (buflen);
} else {
- sg_set_page(sg++, virt_to_page(addr), buflen, off);
+ sg_set_page(&sgtable->sgl[sgtable->nents++],
+ virt_to_page(addr), buflen, off);
}
- return sg;
}
#endif /* _CIFS_GLOB_H */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index cb7a3fe89278..2873f68a051c 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -584,10 +584,7 @@ int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid);
int cifs_async_writev(struct cifs_writedata *wdata,
void (*release)(struct kref *kref));
void cifs_writev_complete(struct work_struct *work);
-struct cifs_writedata *cifs_writedata_alloc(unsigned int nr_pages,
- work_func_t complete);
-struct cifs_writedata *cifs_writedata_direct_alloc(struct page **pages,
- work_func_t complete);
+struct cifs_writedata *cifs_writedata_alloc(work_func_t complete);
void cifs_writedata_release(struct kref *refcount);
int cifs_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb,
@@ -604,13 +601,10 @@ enum securityEnum cifs_select_sectype(struct TCP_Server_Info *,
enum securityEnum);
struct cifs_aio_ctx *cifs_aio_ctx_alloc(void);
void cifs_aio_ctx_release(struct kref *refcount);
-int setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw);
int cifs_alloc_hash(const char *name, struct shash_desc **sdesc);
void cifs_free_hash(struct shash_desc **sdesc);
-void rqst_page_get_length(const struct smb_rqst *rqst, unsigned int page,
- unsigned int *len, unsigned int *offset);
struct cifs_chan *
cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server);
int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index c32bfe68212b..9693ef196e54 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -24,6 +24,7 @@
#include <linux/task_io_accounting_ops.h>
#include <linux/uaccess.h>
#include "cifspdu.h"
+#include "cifsfs.h"
#include "cifsglob.h"
#include "cifsacl.h"
#include "cifsproto.h"
@@ -1294,11 +1295,8 @@ cifs_readv_callback(struct mid_q_entry *mid)
struct TCP_Server_Info *server = tcon->ses->server;
struct smb_rqst rqst = { .rq_iov = rdata->iov,
.rq_nvec = 2,
- .rq_pages = rdata->pages,
- .rq_offset = rdata->page_offset,
- .rq_npages = rdata->nr_pages,
- .rq_pagesz = rdata->pagesz,
- .rq_tailsz = rdata->tailsz };
+ .rq_iter_size = iov_iter_count(&rdata->iter),
+ .rq_iter = rdata->iter };
struct cifs_credits credits = { .value = 1, .instance = 0 };
cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%u\n",
@@ -1737,11 +1735,8 @@ cifs_async_writev(struct cifs_writedata *wdata,
rqst.rq_iov = iov;
rqst.rq_nvec = 2;
- rqst.rq_pages = wdata->pages;
- rqst.rq_offset = wdata->page_offset;
- rqst.rq_npages = wdata->nr_pages;
- rqst.rq_pagesz = wdata->pagesz;
- rqst.rq_tailsz = wdata->tailsz;
+ rqst.rq_iter = wdata->iter;
+ rqst.rq_iter_size = iov_iter_count(&wdata->iter);
cifs_dbg(FYI, "async write at %llu %u bytes\n",
wdata->offset, wdata->bytes);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 09240b8b018a..599578f7e961 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -37,6 +37,32 @@
#include "cached_dir.h"
/*
+ * Remove the dirty flags from a span of pages.
+ */
+static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
+{
+ struct address_space *mapping = inode->i_mapping;
+ struct folio *folio;
+ pgoff_t end;
+
+ XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
+
+ rcu_read_lock();
+
+ end = (start + len - 1) / PAGE_SIZE;
+ xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
+ xas_pause(&xas);
+ rcu_read_unlock();
+ folio_lock(folio);
+ folio_clear_dirty_for_io(folio);
+ folio_unlock(folio);
+ rcu_read_lock();
+ }
+
+ rcu_read_unlock();
+}
+
+/*
* Completion of write to server.
*/
void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
@@ -2391,7 +2417,6 @@ cifs_writedata_release(struct kref *refcount)
if (wdata->cfile)
cifsFileInfo_put(wdata->cfile);
- kvfree(wdata->pages);
kfree(wdata);
}
@@ -2402,51 +2427,49 @@ cifs_writedata_release(struct kref *refcount)
static void
cifs_writev_requeue(struct cifs_writedata *wdata)
{
- int i, rc = 0;
+ int rc = 0;
struct inode *inode = d_inode(wdata->cfile->dentry);
struct TCP_Server_Info *server;
- unsigned int rest_len;
+ unsigned int rest_len = wdata->bytes;
+ loff_t fpos = wdata->offset;
server = tlink_tcon(wdata->cfile->tlink)->ses->server;
- i = 0;
- rest_len = wdata->bytes;
do {
struct cifs_writedata *wdata2;
- unsigned int j, nr_pages, wsize, tailsz, cur_len;
+ unsigned int wsize, cur_len;
wsize = server->ops->wp_retry_size(inode);
if (wsize < rest_len) {
- nr_pages = wsize / PAGE_SIZE;
- if (!nr_pages) {
+ if (wsize < PAGE_SIZE) {
rc = -EOPNOTSUPP;
break;
}
- cur_len = nr_pages * PAGE_SIZE;
- tailsz = PAGE_SIZE;
+ cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
} else {
- nr_pages = DIV_ROUND_UP(rest_len, PAGE_SIZE);
cur_len = rest_len;
- tailsz = rest_len - (nr_pages - 1) * PAGE_SIZE;
}
- wdata2 = cifs_writedata_alloc(nr_pages, cifs_writev_complete);
+ wdata2 = cifs_writedata_alloc(cifs_writev_complete);
if (!wdata2) {
rc = -ENOMEM;
break;
}
- for (j = 0; j < nr_pages; j++) {
- wdata2->pages[j] = wdata->pages[i + j];
- lock_page(wdata2->pages[j]);
- clear_page_dirty_for_io(wdata2->pages[j]);
- }
-
wdata2->sync_mode = wdata->sync_mode;
- wdata2->nr_pages = nr_pages;
- wdata2->offset = page_offset(wdata2->pages[0]);
- wdata2->pagesz = PAGE_SIZE;
- wdata2->tailsz = tailsz;
- wdata2->bytes = cur_len;
+ wdata2->offset = fpos;
+ wdata2->bytes = cur_len;
+ wdata2->iter = wdata->iter;
+
+ iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
+ iov_iter_truncate(&wdata2->iter, wdata2->bytes);
+
+ if (iov_iter_is_xarray(&wdata2->iter))
+ /* Check for pages having been redirtied and clean
+ * them. We can do this by walking the xarray. If
+ * it's not an xarray, then it's a DIO and we shouldn't
+ * be mucking around with the page bits.
+ */
+ cifs_undirty_folios(inode, fpos, cur_len);
rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
&wdata2->cfile);
@@ -2461,33 +2484,22 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
cifs_writedata_release);
}
- for (j = 0; j < nr_pages; j++) {
- unlock_page(wdata2->pages[j]);
- if (rc != 0 && !is_retryable_error(rc)) {
- SetPageError(wdata2->pages[j]);
- end_page_writeback(wdata2->pages[j]);
- put_page(wdata2->pages[j]);
- }
- }
-
kref_put(&wdata2->refcount, cifs_writedata_release);
if (rc) {
if (is_retryable_error(rc))
continue;
- i += nr_pages;
+ fpos += cur_len;
+ rest_len -= cur_len;
break;
}
+ fpos += cur_len;
rest_len -= cur_len;
- i += nr_pages;
- } while (i < wdata->nr_pages);
+ } while (rest_len > 0);
- /* cleanup remaining pages from the original wdata */
- for (; i < wdata->nr_pages; i++) {
- SetPageError(wdata->pages[i]);
- end_page_writeback(wdata->pages[i]);
- put_page(wdata->pages[i]);
- }
+ /* Clean up remaining pages from the original wdata */
+ if (iov_iter_is_xarray(&wdata->iter))
+ cifs_pages_write_failed(inode, fpos, rest_len);
if (rc != 0 && !is_retryable_error(rc))
mapping_set_error(inode->i_mapping, rc);
@@ -2500,7 +2512,6 @@ cifs_writev_complete(struct work_struct *work)
struct cifs_writedata *wdata = container_of(work,
struct cifs_writedata, work);
struct inode *inode = d_inode(wdata->cfile->dentry);
- int i = 0;
if (wdata->result == 0) {
spin_lock(&inode->i_lock);
@@ -2511,45 +2522,24 @@ cifs_writev_complete(struct work_struct *work)
} else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
return cifs_writev_requeue(wdata);
- for (i = 0; i < wdata->nr_pages; i++) {
- struct page *page = wdata->pages[i];
+ if (wdata->result == -EAGAIN)
+ cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
+ else if (wdata->result < 0)
+ cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
+ else
+ cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
- if (wdata->result == -EAGAIN)
- __set_page_dirty_nobuffers(page);
- else if (wdata->result < 0)
- SetPageError(page);
- end_page_writeback(page);
- cifs_readpage_to_fscache(inode, page);
- put_page(page);
- }
if (wdata->result != -EAGAIN)
mapping_set_error(inode->i_mapping, wdata->result);
kref_put(&wdata->refcount, cifs_writedata_release);
}
-struct cifs_writedata *
-cifs_writedata_alloc(unsigned int nr_pages, work_func_t complete)
-{
- struct cifs_writedata *writedata = NULL;
- struct page **pages =
- kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
- if (pages) {
- writedata = cifs_writedata_direct_alloc(pages, complete);
- if (!writedata)
- kvfree(pages);
- }
-
- return writedata;
-}
-
-struct cifs_writedata *
-cifs_writedata_direct_alloc(struct page **pages, work_func_t complete)
+struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
{
struct cifs_writedata *wdata;
wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
if (wdata != NULL) {
- wdata->pages = pages;
kref_init(&wdata->refcount);
INIT_LIST_HEAD(&wdata->list);
init_completion(&wdata->done);
@@ -2558,7 +2548,6 @@ cifs_writedata_direct_alloc(struct page **pages, work_func_t complete)
return wdata;
}
-
static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
{
struct address_space *mapping = page->mapping;
@@ -2617,6 +2606,7 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
return rc;
}
+#if 0 // TODO: Remove for iov_iter support
static struct cifs_writedata *
wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
pgoff_t end, pgoff_t *index,
@@ -2922,6 +2912,375 @@ retry:
set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
return rc;
}
+#endif
+
+/*
+ * Extend the region to be written back to include subsequent contiguously
+ * dirty pages if possible, but don't sleep while doing so.
+ */
+static void cifs_extend_writeback(struct address_space *mapping,
+ long *_count,
+ loff_t start,
+ int max_pages,
+ size_t max_len,
+ unsigned int *_len)
+{
+ struct folio_batch batch;
+ struct folio *folio;
+ unsigned int psize, nr_pages;
+ size_t len = *_len;
+ pgoff_t index = (start + len) / PAGE_SIZE;
+ bool stop = true;
+ unsigned int i;
+ XA_STATE(xas, &mapping->i_pages, index);
+
+ folio_batch_init(&batch);
+
+ do {
+ /* Firstly, we gather up a batch of contiguous dirty pages
+ * under the RCU read lock - but we can't clear the dirty flags
+ * there if any of those pages are mapped.
+ */
+ rcu_read_lock();
+
+ xas_for_each(&xas, folio, ULONG_MAX) {
+ stop = true;
+ if (xas_retry(&xas, folio))
+ continue;
+ if (xa_is_value(folio))
+ break;
+ if (folio_index(folio) != index)
+ break;
+ if (!folio_try_get_rcu(folio)) {
+ xas_reset(&xas);
+ continue;
+ }
+ nr_pages = folio_nr_pages(folio);
+ if (nr_pages > max_pages)
+ break;
+
+ /* Has the page moved or been split? */
+ if (unlikely(folio != xas_reload(&xas))) {
+ folio_put(folio);
+ break;
+ }
+
+ if (!folio_trylock(folio)) {
+ folio_put(folio);
+ break;
+ }
+ if (!folio_test_dirty(folio) || folio_test_writeback(folio)) {
+ folio_unlock(folio);
+ folio_put(folio);
+ break;
+ }
+
+ max_pages -= nr_pages;
+ psize = folio_size(folio);
+ len += psize;
+ stop = false;
+ if (max_pages <= 0 || len >= max_len || *_count <= 0)
+ stop = true;
+
+ index += nr_pages;
+ if (!folio_batch_add(&batch, folio))
+ break;
+ if (stop)
+ break;
+ }
+
+ if (!stop)
+ xas_pause(&xas);
+ rcu_read_unlock();
+
+ /* Now, if we obtained any pages, we can shift them to being
+ * writable and mark them for caching.
+ */
+ if (!folio_batch_count(&batch))
+ break;
+
+ for (i = 0; i < folio_batch_count(&batch); i++) {
+ folio = batch.folios[i];
+ /* The folio should be locked, dirty and not undergoing
+ * writeback from the loop above.
+ */
+ if (!folio_clear_dirty_for_io(folio))
+ WARN_ON(1);
+ if (folio_start_writeback(folio))
+ WARN_ON(1);
+
+ *_count -= folio_nr_pages(folio);
+ folio_unlock(folio);
+ }
+
+ folio_batch_release(&batch);
+ cond_resched();
+ } while (!stop);
+
+ *_len = len;
+}
+
+/*
+ * Write back the locked page and any subsequent non-locked dirty pages.
+ */
+static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
+ struct writeback_control *wbc,
+ struct folio *folio,
+ loff_t start, loff_t end)
+{
+ struct inode *inode = mapping->host;
+ struct TCP_Server_Info *server;
+ struct cifs_writedata *wdata;
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_credits credits_on_stack;
+ struct cifs_credits *credits = &credits_on_stack;
+ struct cifsFileInfo *cfile = NULL;
+ unsigned int xid, wsize, len;
+ loff_t i_size = i_size_read(inode);
+ size_t max_len;
+ long count = wbc->nr_to_write;
+ int rc;
+
+ /* The folio should be locked, dirty and not undergoing writeback. */
+ if (folio_start_writeback(folio))
+ WARN_ON(1);
+
+ count -= folio_nr_pages(folio);
+ len = folio_size(folio);
+
+ xid = get_xid();
+ server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
+
+ rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
+ if (rc) {
+ cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
+ goto err_xid;
+ }
+
+ rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
+ &wsize, credits);
+ if (rc != 0)
+ goto err_close;
+
+ wdata = cifs_writedata_alloc(cifs_writev_complete);
+ if (!wdata) {
+ rc = -ENOMEM;
+ goto err_uncredit;
+ }
+
+ wdata->sync_mode = wbc->sync_mode;
+ wdata->offset = folio_pos(folio);
+ wdata->pid = cfile->pid;
+ wdata->credits = credits_on_stack;
+ wdata->cfile = cfile;
+ wdata->server = server;
+ cfile = NULL;
+
+ /* Find all consecutive lockable dirty pages, stopping when we find a
+ * page that is not immediately lockable, is not dirty or is missing,
+ * or we reach the end of the range.
+ */
+ if (start < i_size) {
+ /* Trim the write to the EOF; the extra data is ignored. Also
+ * put an upper limit on the size of a single storedata op.
+ */
+ max_len = wsize;
+ max_len = min_t(unsigned long long, max_len, end - start + 1);
+ max_len = min_t(unsigned long long, max_len, i_size - start);
+
+ if (len < max_len) {
+ int max_pages = INT_MAX;
+
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ if (server->smbd_conn)
+ max_pages = server->smbd_conn->max_frmr_depth;
+#endif
+ max_pages -= folio_nr_pages(folio);
+
+ if (max_pages > 0)
+ cifs_extend_writeback(mapping, &count, start,
+ max_pages, max_len, &len);
+ }
+ len = min_t(loff_t, len, max_len);
+ }
+
+ wdata->bytes = len;
+
+ /* We now have a contiguous set of dirty pages, each with writeback
+ * set; the first page is still locked at this point, but all the rest
+ * have been unlocked.
+ */
+ folio_unlock(folio);
+
+ if (start < i_size) {
+ iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
+ start, len);
+
+ rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
+ if (rc)
+ goto err_wdata;
+
+ if (wdata->cfile->invalidHandle)
+ rc = -EAGAIN;
+ else
+ rc = wdata->server->ops->async_writev(wdata,
+ cifs_writedata_release);
+ if (rc >= 0) {
+ kref_put(&wdata->refcount, cifs_writedata_release);
+ goto err_close;
+ }
+ } else {
+ /* The dirty region was entirely beyond the EOF. */
+ cifs_pages_written_back(inode, start, len);
+ rc = 0;
+ }
+
+err_wdata:
+ kref_put(&wdata->refcount, cifs_writedata_release);
+err_uncredit:
+ add_credits_and_wake_if(server, credits, 0);
+err_close:
+ if (cfile)
+ cifsFileInfo_put(cfile);
+err_xid:
+ free_xid(xid);
+ if (rc == 0) {
+ wbc->nr_to_write = count;
+ } else if (is_retryable_error(rc)) {
+ cifs_pages_write_redirty(inode, start, len);
+ } else {
+ cifs_pages_write_failed(inode, start, len);
+ mapping_set_error(mapping, rc);
+ }
+ /* Indication to update ctime and mtime as close is deferred */
+ set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
+ return rc;
+}
+
+/*
+ * write a region of pages back to the server
+ */
+static int cifs_writepages_region(struct address_space *mapping,
+ struct writeback_control *wbc,
+ loff_t start, loff_t end, loff_t *_next)
+{
+ struct folio *folio;
+ struct page *head_page;
+ ssize_t ret;
+ int n, skips = 0;
+
+ do {
+ pgoff_t index = start / PAGE_SIZE;
+
+ n = find_get_pages_range_tag(mapping, &index, end / PAGE_SIZE,
+ PAGECACHE_TAG_DIRTY, 1, &head_page);
+ if (!n)
+ break;
+
+ folio = page_folio(head_page);
+ start = folio_pos(folio); /* May regress with THPs */
+
+ /* At this point we hold neither the i_pages lock nor the
+ * page lock: the page may be truncated or invalidated
+ * (changing page->mapping to NULL), or even swizzled
+ * back from swapper_space to tmpfs file mapping
+ */
+ if (wbc->sync_mode != WB_SYNC_NONE) {
+ ret = folio_lock_killable(folio);
+ if (ret < 0) {
+ folio_put(folio);
+ return ret;
+ }
+ } else {
+ if (!folio_trylock(folio)) {
+ folio_put(folio);
+ return 0;
+ }
+ }
+
+ if (folio_mapping(folio) != mapping ||
+ !folio_test_dirty(folio)) {
+ start += folio_size(folio);
+ folio_unlock(folio);
+ folio_put(folio);
+ continue;
+ }
+
+ if (folio_test_writeback(folio) ||
+ folio_test_fscache(folio)) {
+ folio_unlock(folio);
+ if (wbc->sync_mode != WB_SYNC_NONE) {
+ folio_wait_writeback(folio);
+#ifdef CONFIG_CIFS_FSCACHE
+ folio_wait_fscache(folio);
+#endif
+ } else {
+ start += folio_size(folio);
+ }
+ folio_put(folio);
+ if (wbc->sync_mode == WB_SYNC_NONE) {
+ if (skips >= 5 || need_resched())
+ break;
+ skips++;
+ }
+ continue;
+ }
+
+ if (!folio_clear_dirty_for_io(folio))
+ /* We hold the page lock - it should've been dirty. */
+ WARN_ON(1);
+
+ ret = cifs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
+ folio_put(folio);
+ if (ret < 0)
+ return ret;
+
+ start += ret;
+ cond_resched();
+ } while (wbc->nr_to_write > 0);
+
+ *_next = start;
+ return 0;
+}
+
+/*
+ * Write some of the pending data back to the server
+ */
+static int cifs_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ loff_t start, next;
+ int ret;
+
+ /* We have to be careful as we can end up racing with setattr()
+ * truncating the pagecache since the caller doesn't take a lock here
+ * to prevent it.
+ */
+
+ if (wbc->range_cyclic) {
+ start = mapping->writeback_index * PAGE_SIZE;
+ ret = cifs_writepages_region(mapping, wbc, start, LLONG_MAX, &next);
+ if (ret == 0) {
+ mapping->writeback_index = next / PAGE_SIZE;
+ if (start > 0 && wbc->nr_to_write > 0) {
+ ret = cifs_writepages_region(mapping, wbc, 0,
+ start, &next);
+ if (ret == 0)
+ mapping->writeback_index =
+ next / PAGE_SIZE;
+ }
+ }
+ } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
+ ret = cifs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next);
+ if (wbc->nr_to_write > 0 && ret == 0)
+ mapping->writeback_index = next / PAGE_SIZE;
+ } else {
+ ret = cifs_writepages_region(mapping, wbc,
+ wbc->range_start, wbc->range_end, &next);
+ }
+
+ return ret;
+}