diff options
Diffstat (limited to 'fs/cifs')
| -rw-r--r-- | fs/cifs/Kconfig | 1 | ||||
| -rw-r--r-- | fs/cifs/cifsencrypt.c | 28 | ||||
| -rw-r--r-- | fs/cifs/cifsglob.h | 66 | ||||
| -rw-r--r-- | fs/cifs/cifsproto.h | 8 | ||||
| -rw-r--r-- | fs/cifs/cifssmb.c | 15 | ||||
| -rw-r--r-- | fs/cifs/file.c | 1195 | ||||
| -rw-r--r-- | fs/cifs/fscache.c | 22 | ||||
| -rw-r--r-- | fs/cifs/fscache.h | 10 | ||||
| -rw-r--r-- | fs/cifs/misc.c | 128 | ||||
| -rw-r--r-- | fs/cifs/smb2ops.c | 378 | ||||
| -rw-r--r-- | fs/cifs/smb2pdu.c | 53 | ||||
| -rw-r--r-- | fs/cifs/smbdirect.c | 262 | ||||
| -rw-r--r-- | fs/cifs/smbdirect.h | 4 | ||||
| -rw-r--r-- | fs/cifs/transport.c | 54 |
14 files changed, 1133 insertions, 1091 deletions
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index bbf63a9eb927..4c0d53bf931a 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -18,6 +18,7 @@ config CIFS select DNS_RESOLVER select ASN1 select OID_REGISTRY + select NETFS_SUPPORT help This is the client VFS module for the SMB3 family of network file protocols (including the most recent, most secure dialect SMB3.1.1). diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 7be589aeb520..357bd27a7fd1 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -169,11 +169,11 @@ static int cifs_shash_iter(const struct iov_iter *iter, size_t maxsize, } int __cifs_calc_signature(struct smb_rqst *rqst, - struct TCP_Server_Info *server, char *signature, - struct shash_desc *shash) + struct TCP_Server_Info *server, char *signature, + struct shash_desc *shash) { int i; - int rc; + ssize_t rc; struct kvec *iov = rqst->rq_iov; int n_vec = rqst->rq_nvec; @@ -205,25 +205,9 @@ int __cifs_calc_signature(struct smb_rqst *rqst, } } - /* now hash over the rq_pages array */ - for (i = 0; i < rqst->rq_npages; i++) { - void *kaddr; - unsigned int len, offset; - - rqst_page_get_length(rqst, i, &len, &offset); - - kaddr = (char *) kmap(rqst->rq_pages[i]) + offset; - - rc = crypto_shash_update(shash, kaddr, len); - if (rc) { - cifs_dbg(VFS, "%s: Could not update with payload\n", - __func__); - kunmap(rqst->rq_pages[i]); - return rc; - } - - kunmap(rqst->rq_pages[i]); - } + rc = cifs_shash_iter(&rqst->rq_iter, iov_iter_count(&rqst->rq_iter), shash); + if (rc < 0) + return rc; rc = crypto_shash_final(shash, signature); if (rc) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 00ee5e7f79c6..66d107cf1064 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -212,11 +212,9 @@ static inline void cifs_free_open_info(struct cifs_open_info_data *data) struct smb_rqst { struct kvec *rq_iov; /* array of kvecs */ unsigned int rq_nvec; /* number of kvecs in array */ - struct page **rq_pages; /* pointer to array of page ptrs */ - unsigned int rq_offset; /* the offset to the 1st page */ - unsigned int rq_npages; /* number pages in array */ - unsigned int rq_pagesz; /* page size to use */ - unsigned int rq_tailsz; /* length of last page */ + size_t rq_iter_size; /* Amount of data in ->rq_iter */ + struct iov_iter rq_iter; /* Data iterator */ + struct xarray rq_buffer; /* Page buffer for encryption */ }; struct mid_q_entry; @@ -1421,10 +1419,11 @@ struct cifs_aio_ctx { struct cifsFileInfo *cfile; struct bio_vec *bv; loff_t pos; - unsigned int npages; + unsigned int nr_pinned_pages; ssize_t rc; unsigned int len; unsigned int total_len; + unsigned int bv_need_unpin; /* If ->bv[] needs unpinning */ bool should_dirty; /* * Indicates if this aio_ctx is for direct_io, @@ -1442,28 +1441,18 @@ struct cifs_readdata { struct address_space *mapping; struct cifs_aio_ctx *ctx; __u64 offset; + ssize_t got_bytes; unsigned int bytes; - unsigned int got_bytes; pid_t pid; int result; struct work_struct work; - int (*read_into_pages)(struct TCP_Server_Info *server, - struct cifs_readdata *rdata, - unsigned int len); - int (*copy_into_pages)(struct TCP_Server_Info *server, - struct cifs_readdata *rdata, - struct iov_iter *iter); + struct iov_iter iter; struct kvec iov[2]; struct TCP_Server_Info *server; #ifdef CONFIG_CIFS_SMB_DIRECT struct smbd_mr *mr; #endif - unsigned int pagesz; - unsigned int page_offset; - unsigned int tailsz; struct cifs_credits credits; - unsigned int nr_pages; - struct page **pages; }; /* asynchronous write support */ @@ -1475,6 +1464,8 @@ struct cifs_writedata { struct work_struct work; struct cifsFileInfo *cfile; struct cifs_aio_ctx *ctx; + struct iov_iter iter; + struct bio_vec *bv; __u64 offset; pid_t pid; unsigned int bytes; @@ -1483,12 +1474,7 @@ struct cifs_writedata { #ifdef CONFIG_CIFS_SMB_DIRECT struct smbd_mr *mr; #endif - unsigned int pagesz; - unsigned int page_offset; - unsigned int tailsz; struct cifs_credits credits; - unsigned int nr_pages; - struct page **pages; }; /* @@ -2148,9 +2134,9 @@ static inline void move_cifs_info_to_smb2(struct smb2_file_all_info *dst, const dst->FileNameLength = src->FileNameLength; } -static inline unsigned int cifs_get_num_sgs(const struct smb_rqst *rqst, - int num_rqst, - const u8 *sig) +static inline int cifs_get_num_sgs(const struct smb_rqst *rqst, + int num_rqst, + const u8 *sig) { unsigned int len, skip; unsigned int nents = 0; @@ -2170,6 +2156,19 @@ static inline unsigned int cifs_get_num_sgs(const struct smb_rqst *rqst, * rqst[1+].rq_iov[0+] data to be encrypted/decrypted */ for (i = 0; i < num_rqst; i++) { + /* We really don't want a mixture of pinned and unpinned pages + * in the sglist. It's hard to keep track of which is what. + * Instead, we convert to a BVEC-type iterator higher up. + */ + if (WARN_ON_ONCE(user_backed_iter(&rqst[i].rq_iter))) + return -EIO; + + /* We also don't want to have any extra refs or pins to clean + * up in the sglist. + */ + if (WARN_ON_ONCE(iov_iter_extract_will_pin(&rqst[i].rq_iter))) + return -EIO; + for (j = 0; j < rqst[i].rq_nvec; j++) { struct kvec *iov = &rqst[i].rq_iov[j]; @@ -2183,7 +2182,7 @@ static inline unsigned int cifs_get_num_sgs(const struct smb_rqst *rqst, } skip = 0; } - nents += rqst[i].rq_npages; + nents += iov_iter_npages(&rqst[i].rq_iter, INT_MAX); } nents += DIV_ROUND_UP(offset_in_page(sig) + SMB2_SIGNATURE_SIZE, PAGE_SIZE); return nents; @@ -2192,9 +2191,9 @@ static inline unsigned int cifs_get_num_sgs(const struct smb_rqst *rqst, /* We can not use the normal sg_set_buf() as we will sometimes pass a * stack object as buf. */ -static inline struct scatterlist *cifs_sg_set_buf(struct scatterlist *sg, - const void *buf, - unsigned int buflen) +static inline void cifs_sg_set_buf(struct sg_table *sgtable, + const void *buf, + unsigned int buflen) { unsigned long addr = (unsigned long)buf; unsigned int off = offset_in_page(addr); @@ -2204,16 +2203,17 @@ static inline struct scatterlist *cifs_sg_set_buf(struct scatterlist *sg, do { unsigned int len = min_t(unsigned int, buflen, PAGE_SIZE - off); - sg_set_page(sg++, vmalloc_to_page((void *)addr), len, off); + sg_set_page(&sgtable->sgl[sgtable->nents++], + vmalloc_to_page((void *)addr), len, off); off = 0; addr += PAGE_SIZE; buflen -= len; } while (buflen); } else { - sg_set_page(sg++, virt_to_page(addr), buflen, off); + sg_set_page(&sgtable->sgl[sgtable->nents++], + virt_to_page(addr), buflen, off); } - return sg; } #endif /* _CIFS_GLOB_H */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index cb7a3fe89278..2873f68a051c 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -584,10 +584,7 @@ int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid); int cifs_async_writev(struct cifs_writedata *wdata, void (*release)(struct kref *kref)); void cifs_writev_complete(struct work_struct *work); -struct cifs_writedata *cifs_writedata_alloc(unsigned int nr_pages, - work_func_t complete); -struct cifs_writedata *cifs_writedata_direct_alloc(struct page **pages, - work_func_t complete); +struct cifs_writedata *cifs_writedata_alloc(work_func_t complete); void cifs_writedata_release(struct kref *refcount); int cifs_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, @@ -604,13 +601,10 @@ enum securityEnum cifs_select_sectype(struct TCP_Server_Info *, enum securityEnum); struct cifs_aio_ctx *cifs_aio_ctx_alloc(void); void cifs_aio_ctx_release(struct kref *refcount); -int setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw); int cifs_alloc_hash(const char *name, struct shash_desc **sdesc); void cifs_free_hash(struct shash_desc **sdesc); -void rqst_page_get_length(const struct smb_rqst *rqst, unsigned int page, - unsigned int *len, unsigned int *offset); struct cifs_chan * cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server); int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index c32bfe68212b..9693ef196e54 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -24,6 +24,7 @@ #include <linux/task_io_accounting_ops.h> #include <linux/uaccess.h> #include "cifspdu.h" +#include "cifsfs.h" #include "cifsglob.h" #include "cifsacl.h" #include "cifsproto.h" @@ -1294,11 +1295,8 @@ cifs_readv_callback(struct mid_q_entry *mid) struct TCP_Server_Info *server = tcon->ses->server; struct smb_rqst rqst = { .rq_iov = rdata->iov, .rq_nvec = 2, - .rq_pages = rdata->pages, - .rq_offset = rdata->page_offset, - .rq_npages = rdata->nr_pages, - .rq_pagesz = rdata->pagesz, - .rq_tailsz = rdata->tailsz }; + .rq_iter_size = iov_iter_count(&rdata->iter), + .rq_iter = rdata->iter }; struct cifs_credits credits = { .value = 1, .instance = 0 }; cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%u\n", @@ -1737,11 +1735,8 @@ cifs_async_writev(struct cifs_writedata *wdata, rqst.rq_iov = iov; rqst.rq_nvec = 2; - rqst.rq_pages = wdata->pages; - rqst.rq_offset = wdata->page_offset; - rqst.rq_npages = wdata->nr_pages; - rqst.rq_pagesz = wdata->pagesz; - rqst.rq_tailsz = wdata->tailsz; + rqst.rq_iter = wdata->iter; + rqst.rq_iter_size = iov_iter_count(&wdata->iter); cifs_dbg(FYI, "async write at %llu %u bytes\n", wdata->offset, wdata->bytes); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 09240b8b018a..599578f7e961 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -37,6 +37,32 @@ #include "cached_dir.h" /* + * Remove the dirty flags from a span of pages. + */ +static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len) +{ + struct address_space *mapping = inode->i_mapping; + struct folio *folio; + pgoff_t end; + + XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); + + rcu_read_lock(); + + end = (start + len - 1) / PAGE_SIZE; + xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) { + xas_pause(&xas); + rcu_read_unlock(); + folio_lock(folio); + folio_clear_dirty_for_io(folio); + folio_unlock(folio); + rcu_read_lock(); + } + + rcu_read_unlock(); +} + +/* * Completion of write to server. */ void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len) @@ -2391,7 +2417,6 @@ cifs_writedata_release(struct kref *refcount) if (wdata->cfile) cifsFileInfo_put(wdata->cfile); - kvfree(wdata->pages); kfree(wdata); } @@ -2402,51 +2427,49 @@ cifs_writedata_release(struct kref *refcount) static void cifs_writev_requeue(struct cifs_writedata *wdata) { - int i, rc = 0; + int rc = 0; struct inode *inode = d_inode(wdata->cfile->dentry); struct TCP_Server_Info *server; - unsigned int rest_len; + unsigned int rest_len = wdata->bytes; + loff_t fpos = wdata->offset; server = tlink_tcon(wdata->cfile->tlink)->ses->server; - i = 0; - rest_len = wdata->bytes; do { struct cifs_writedata *wdata2; - unsigned int j, nr_pages, wsize, tailsz, cur_len; + unsigned int wsize, cur_len; wsize = server->ops->wp_retry_size(inode); if (wsize < rest_len) { - nr_pages = wsize / PAGE_SIZE; - if (!nr_pages) { + if (wsize < PAGE_SIZE) { rc = -EOPNOTSUPP; break; } - cur_len = nr_pages * PAGE_SIZE; - tailsz = PAGE_SIZE; + cur_len = min(round_down(wsize, PAGE_SIZE), rest_len); } else { - nr_pages = DIV_ROUND_UP(rest_len, PAGE_SIZE); cur_len = rest_len; - tailsz = rest_len - (nr_pages - 1) * PAGE_SIZE; } - wdata2 = cifs_writedata_alloc(nr_pages, cifs_writev_complete); + wdata2 = cifs_writedata_alloc(cifs_writev_complete); if (!wdata2) { rc = -ENOMEM; break; } - for (j = 0; j < nr_pages; j++) { - wdata2->pages[j] = wdata->pages[i + j]; - lock_page(wdata2->pages[j]); - clear_page_dirty_for_io(wdata2->pages[j]); - } - wdata2->sync_mode = wdata->sync_mode; - wdata2->nr_pages = nr_pages; - wdata2->offset = page_offset(wdata2->pages[0]); - wdata2->pagesz = PAGE_SIZE; - wdata2->tailsz = tailsz; - wdata2->bytes = cur_len; + wdata2->offset = fpos; + wdata2->bytes = cur_len; + wdata2->iter = wdata->iter; + + iov_iter_advance(&wdata2->iter, fpos - wdata->offset); + iov_iter_truncate(&wdata2->iter, wdata2->bytes); + + if (iov_iter_is_xarray(&wdata2->iter)) + /* Check for pages having been redirtied and clean + * them. We can do this by walking the xarray. If + * it's not an xarray, then it's a DIO and we shouldn't + * be mucking around with the page bits. + */ + cifs_undirty_folios(inode, fpos, cur_len); rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &wdata2->cfile); @@ -2461,33 +2484,22 @@ cifs_writev_requeue(struct cifs_writedata *wdata) cifs_writedata_release); } - for (j = 0; j < nr_pages; j++) { - unlock_page(wdata2->pages[j]); - if (rc != 0 && !is_retryable_error(rc)) { - SetPageError(wdata2->pages[j]); - end_page_writeback(wdata2->pages[j]); - put_page(wdata2->pages[j]); - } - } - kref_put(&wdata2->refcount, cifs_writedata_release); if (rc) { if (is_retryable_error(rc)) continue; - i += nr_pages; + fpos += cur_len; + rest_len -= cur_len; break; } + fpos += cur_len; rest_len -= cur_len; - i += nr_pages; - } while (i < wdata->nr_pages); + } while (rest_len > 0); - /* cleanup remaining pages from the original wdata */ - for (; i < wdata->nr_pages; i++) { - SetPageError(wdata->pages[i]); - end_page_writeback(wdata->pages[i]); - put_page(wdata->pages[i]); - } + /* Clean up remaining pages from the original wdata */ + if (iov_iter_is_xarray(&wdata->iter)) + cifs_pages_write_failed(inode, fpos, rest_len); if (rc != 0 && !is_retryable_error(rc)) mapping_set_error(inode->i_mapping, rc); @@ -2500,7 +2512,6 @@ cifs_writev_complete(struct work_struct *work) struct cifs_writedata *wdata = container_of(work, struct cifs_writedata, work); struct inode *inode = d_inode(wdata->cfile->dentry); - int i = 0; if (wdata->result == 0) { spin_lock(&inode->i_lock); @@ -2511,45 +2522,24 @@ cifs_writev_complete(struct work_struct *work) } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN) return cifs_writev_requeue(wdata); - for (i = 0; i < wdata->nr_pages; i++) { - struct page *page = wdata->pages[i]; + if (wdata->result == -EAGAIN) + cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes); + else if (wdata->result < 0) + cifs_pages_write_failed(inode, wdata->offset, wdata->bytes); + else + cifs_pages_written_back(inode, wdata->offset, wdata->bytes); - if (wdata->result == -EAGAIN) - __set_page_dirty_nobuffers(page); - else if (wdata->result < 0) - SetPageError(page); - end_page_writeback(page); - cifs_readpage_to_fscache(inode, page); - put_page(page); - } if (wdata->result != -EAGAIN) mapping_set_error(inode->i_mapping, wdata->result); kref_put(&wdata->refcount, cifs_writedata_release); } -struct cifs_writedata * -cifs_writedata_alloc(unsigned int nr_pages, work_func_t complete) -{ - struct cifs_writedata *writedata = NULL; - struct page **pages = - kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); - if (pages) { - writedata = cifs_writedata_direct_alloc(pages, complete); - if (!writedata) - kvfree(pages); - } - - return writedata; -} - -struct cifs_writedata * -cifs_writedata_direct_alloc(struct page **pages, work_func_t complete) +struct cifs_writedata *cifs_writedata_alloc(work_func_t complete) { struct cifs_writedata *wdata; wdata = kzalloc(sizeof(*wdata), GFP_NOFS); if (wdata != NULL) { - wdata->pages = pages; kref_init(&wdata->refcount); INIT_LIST_HEAD(&wdata->list); init_completion(&wdata->done); @@ -2558,7 +2548,6 @@ cifs_writedata_direct_alloc(struct page **pages, work_func_t complete) return wdata; } - static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) { struct address_space *mapping = page->mapping; @@ -2617,6 +2606,7 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) return rc; } +#if 0 // TODO: Remove for iov_iter support static struct cifs_writedata * wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping, pgoff_t end, pgoff_t *index, @@ -2922,6 +2912,375 @@ retry: set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); return rc; } +#endif + +/* + * Extend the region to be written back to include subsequent contiguously + * dirty pages if possible, but don't sleep while doing so. + */ +static void cifs_extend_writeback(struct address_space *mapping, + long *_count, + loff_t start, + int max_pages, + size_t max_len, + unsigned int *_len) +{ + struct folio_batch batch; + struct folio *folio; + unsigned int psize, nr_pages; + size_t len = *_len; + pgoff_t index = (start + len) / PAGE_SIZE; + bool stop = true; + unsigned int i; + XA_STATE(xas, &mapping->i_pages, index); + + folio_batch_init(&batch); + + do { + /* Firstly, we gather up a batch of contiguous dirty pages + * under the RCU read lock - but we can't clear the dirty flags + * there if any of those pages are mapped. + */ + rcu_read_lock(); + + xas_for_each(&xas, folio, ULONG_MAX) { + stop = true; + if (xas_retry(&xas, folio)) + continue; + if (xa_is_value(folio)) + break; + if (folio_index(folio) != index) + break; + if (!folio_try_get_rcu(folio)) { + xas_reset(&xas); + continue; + } + nr_pages = folio_nr_pages(folio); + if (nr_pages > max_pages) + break; + + /* Has the page moved or been split? */ + if (unlikely(folio != xas_reload(&xas))) { + folio_put(folio); + break; + } + + if (!folio_trylock(folio)) { + folio_put(folio); + break; + } + if (!folio_test_dirty(folio) || folio_test_writeback(folio)) { + folio_unlock(folio); + folio_put(folio); + break; + } + + max_pages -= nr_pages; + psize = folio_size(folio); + len += psize; + stop = false; + if (max_pages <= 0 || len >= max_len || *_count <= 0) + stop = true; + + index += nr_pages; + if (!folio_batch_add(&batch, folio)) + break; + if (stop) + break; + } + + if (!stop) + xas_pause(&xas); + rcu_read_unlock(); + + /* Now, if we obtained any pages, we can shift them to being + * writable and mark them for caching. + */ + if (!folio_batch_count(&batch)) + break; + + for (i = 0; i < folio_batch_count(&batch); i++) { + folio = batch.folios[i]; + /* The folio should be locked, dirty and not undergoing + * writeback from the loop above. + */ + if (!folio_clear_dirty_for_io(folio)) + WARN_ON(1); + if (folio_start_writeback(folio)) + WARN_ON(1); + + *_count -= folio_nr_pages(folio); + folio_unlock(folio); + } + + folio_batch_release(&batch); + cond_resched(); + } while (!stop); + + *_len = len; +} + +/* + * Write back the locked page and any subsequent non-locked dirty pages. + */ +static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping, + struct writeback_control *wbc, + struct folio *folio, + loff_t start, loff_t end) +{ + struct inode *inode = mapping->host; + struct TCP_Server_Info *server; + struct cifs_writedata *wdata; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_credits credits_on_stack; + struct cifs_credits *credits = &credits_on_stack; + struct cifsFileInfo *cfile = NULL; + unsigned int xid, wsize, len; + loff_t i_size = i_size_read(inode); + size_t max_len; + long count = wbc->nr_to_write; + int rc; + + /* The folio should be locked, dirty and not undergoing writeback. */ + if (folio_start_writeback(folio)) + WARN_ON(1); + + count -= folio_nr_pages(folio); + len = folio_size(folio); + + xid = get_xid(); + server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses); + + rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); + if (rc) { + cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc); + goto err_xid; + } + + rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, + &wsize, credits); + if (rc != 0) + goto err_close; + + wdata = cifs_writedata_alloc(cifs_writev_complete); + if (!wdata) { + rc = -ENOMEM; + goto err_uncredit; + } + + wdata->sync_mode = wbc->sync_mode; + wdata->offset = folio_pos(folio); + wdata->pid = cfile->pid; + wdata->credits = credits_on_stack; + wdata->cfile = cfile; + wdata->server = server; + cfile = NULL; + + /* Find all consecutive lockable dirty pages, stopping when we find a + * page that is not immediately lockable, is not dirty or is missing, + * or we reach the end of the range. + */ + if (start < i_size) { + /* Trim the write to the EOF; the extra data is ignored. Also + * put an upper limit on the size of a single storedata op. + */ + max_len = wsize; + max_len = min_t(unsigned long long, max_len, end - start + 1); + max_len = min_t(unsigned long long, max_len, i_size - start); + + if (len < max_len) { + int max_pages = INT_MAX; + +#ifdef CONFIG_CIFS_SMB_DIRECT + if (server->smbd_conn) + max_pages = server->smbd_conn->max_frmr_depth; +#endif + max_pages -= folio_nr_pages(folio); + + if (max_pages > 0) + cifs_extend_writeback(mapping, &count, start, + max_pages, max_len, &len); + } + len = min_t(loff_t, len, max_len); + } + + wdata->bytes = len; + + /* We now have a contiguous set of dirty pages, each with writeback + * set; the first page is still locked at this point, but all the rest + * have been unlocked. + */ + folio_unlock(folio); + + if (start < i_size) { + iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages, + start, len); + + rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes); + if (rc) + goto err_wdata; + + if (wdata->cfile->invalidHandle) + rc = -EAGAIN; + else + rc = wdata->server->ops->async_writev(wdata, + cifs_writedata_release); + if (rc >= 0) { + kref_put(&wdata->refcount, cifs_writedata_release); + goto err_close; + } + } else { + /* The dirty region was entirely beyond the EOF. */ + cifs_pages_written_back(inode, start, len); + rc = 0; + } + +err_wdata: + kref_put(&wdata->refcount, cifs_writedata_release); +err_uncredit: + add_credits_and_wake_if(server, credits, 0); +err_close: + if (cfile) + cifsFileInfo_put(cfile); +err_xid: + free_xid(xid); + if (rc == 0) { + wbc->nr_to_write = count; + } else if (is_retryable_error(rc)) { + cifs_pages_write_redirty(inode, start, len); + } else { + cifs_pages_write_failed(inode, start, len); + mapping_set_error(mapping, rc); + } + /* Indication to update ctime and mtime as close is deferred */ + set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); + return rc; +} + +/* + * write a region of pages back to the server + */ +static int cifs_writepages_region(struct address_space *mapping, + struct writeback_control *wbc, + loff_t start, loff_t end, loff_t *_next) +{ + struct folio *folio; + struct page *head_page; + ssize_t ret; + int n, skips = 0; + + do { + pgoff_t index = start / PAGE_SIZE; + + n = find_get_pages_range_tag(mapping, &index, end / PAGE_SIZE, + PAGECACHE_TAG_DIRTY, 1, &head_page); + if (!n) + break; + + folio = page_folio(head_page); + start = folio_pos(folio); /* May regress with THPs */ + + /* At this point we hold neither the i_pages lock nor the + * page lock: the page may be truncated or invalidated + * (changing page->mapping to NULL), or even swizzled + * back from swapper_space to tmpfs file mapping + */ + if (wbc->sync_mode != WB_SYNC_NONE) { + ret = folio_lock_killable(folio); + if (ret < 0) { + folio_put(folio); + return ret; + } + } else { + if (!folio_trylock(folio)) { + folio_put(folio); + return 0; + } + } + + if (folio_mapping(folio) != mapping || + !folio_test_dirty(folio)) { + start += folio_size(folio); + folio_unlock(folio); + folio_put(folio); + continue; + } + + if (folio_test_writeback(folio) || + folio_test_fscache(folio)) { + folio_unlock(folio); + if (wbc->sync_mode != WB_SYNC_NONE) { + folio_wait_writeback(folio); +#ifdef CONFIG_CIFS_FSCACHE + folio_wait_fscache(folio); +#endif + } else { + start += folio_size(folio); + } + folio_put(folio); + if (wbc->sync_mode == WB_SYNC_NONE) { + if (skips >= 5 || need_resched()) + break; + skips++; + } + continue; + } + + if (!folio_clear_dirty_for_io(folio)) + /* We hold the page lock - it should've been dirty. */ + WARN_ON(1); + + ret = cifs_write_back_from_locked_folio(mapping, wbc, folio, start, end); + folio_put(folio); + if (ret < 0) + return ret; + + start += ret; + cond_resched(); + } while (wbc->nr_to_write > 0); + + *_next = start; + return 0; +} + +/* + * Write some of the pending data back to the server + */ +static int cifs_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + loff_t start, next; + int ret; + + /* We have to be careful as we can end up racing with setattr() + * truncating the pagecache since the caller doesn't take a lock here + * to prevent it. + */ + + if (wbc->range_cyclic) { + start = mapping->writeback_index * PAGE_SIZE; + ret = cifs_writepages_region(mapping, wbc, start, LLONG_MAX, &next); + if (ret == 0) { + mapping->writeback_index = next / PAGE_SIZE; + if (start > 0 && wbc->nr_to_write > 0) { + ret = cifs_writepages_region(mapping, wbc, 0, + start, &next); + if (ret == 0) + mapping->writeback_index = + next / PAGE_SIZE; + } + } + } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { + ret = cifs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next); + if (wbc->nr_to_write > 0 && ret == 0) + mapping->writeback_index = next / PAGE_SIZE; + } else { + ret = cifs_writepages_region(mapping, wbc, + wbc->range_start, wbc->range_end, &next); + } + + return ret; +} static int cifs_writepage_locked(struct page *page, struct writeback_control *wbc) @@ -2972,6 +3331,7 @@ static int cifs_write_end(struct file *file, struct address_space *mapping, struct inode *inode = mapping->host; struct cifsFileInfo *cfile = file->private_data; struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); + struct folio *folio = page_folio(page); __u32 pid; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) @@ -2982,14 +3342,14 @@ static int cifs_write_end(struct file *file, struct address_space *mapping, cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n", page, pos, copied); - if (PageChecked(page)) { + if (folio_test_checked(folio)) { if (copied == len) - SetPageUptodate(page); - ClearPageChecked(page); - } else if (!PageUptodate(page) && copied == PAGE_SIZE) - SetPageUptodate(page); + folio_mark_uptodate(folio); + folio_clear_checked(folio); + } else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE) + folio_mark_uptodate(folio); - if (!PageUptodate(page)) { + if (!folio_test_uptodate(folio)) { char *page_data; unsigned offset = pos & (PAGE_SIZE - 1); unsigned int xid; @@ -3149,6 +3509,7 @@ int cifs_flush(struct file *file, fl_owner_t id) return rc; } +#if 0 // TODO: Remove for iov_iter support static int cifs_write_allocate_pages(struct page **pages, unsigned long num_pages) { @@ -3189,17 +3550,15 @@ size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len) return num_pages; } +#endif static void cifs_uncached_writedata_release(struct kref *refcount) { - int i; struct cifs_writedata *wdata = container_of(refcount, struct cifs_writedata, refcount); kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release); - for (i = 0; i < wdata->nr_pages; i++) - put_page(wdata->pages[i]); cifs_writedata_release(refcount); } @@ -3225,6 +3584,7 @@ cifs_uncached_writev_complete(struct work_struct *work) kref_put(&wdata->refcount, cifs_uncached_writedata_release); } +#if 0 // TODO: Remove for iov_iter support static int wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from, size_t *len, unsigned long *num_pages) @@ -3266,6 +3626,7 @@ wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from, *num_pages = i + 1; return 0; } +#endif static int cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, @@ -3337,23 +3698,57 @@ fail: return rc; } +/* + * Select span of a bvec iterator we're going to use. Limit it by both maximum + * size and maximum number of segments. + */ +static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size, + size_t max_segs, unsigned int *_nsegs) +{ + const struct bio_vec *bvecs = iter->bvec; + unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0; + size_t len, span = 0, n = iter->count; + size_t skip = iter->iov_offset; + + if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0) + return 0; + + while (n && ix < nbv && skip) { + len = bvecs[ix].bv_len; + if (skip < len) + break; + skip -= len; + n -= len; + ix++; + } + + while (n && ix < nbv) { + len = min3(n, bvecs[ix].bv_len - skip, max_size); + span += len; + nsegs++; + ix++; + if (span >= max_size || nsegs >= max_segs) + b |
