diff options
Diffstat (limited to 'lib/iov_iter.c')
-rw-r--r-- | lib/iov_iter.c | 1136 |
1 files changed, 486 insertions, 650 deletions
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 0b64695ab632..4b7fce72e3e5 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -16,6 +16,16 @@ #define PIPE_PARANOIA /* for now */ +/* covers ubuf and kbuf alike */ +#define iterate_buf(i, n, base, len, off, __p, STEP) { \ + size_t __maybe_unused off = 0; \ + len = n; \ + base = __p + i->iov_offset; \ + len -= (STEP); \ + i->iov_offset += len; \ + n = len; \ +} + /* covers iovec and kvec alike */ #define iterate_iovec(i, n, base, len, off, __p, STEP) { \ size_t off = 0; \ @@ -110,7 +120,12 @@ __out: \ if (unlikely(i->count < n)) \ n = i->count; \ if (likely(n)) { \ - if (likely(iter_is_iovec(i))) { \ + if (likely(iter_is_ubuf(i))) { \ + void __user *base; \ + size_t len; \ + iterate_buf(i, n, base, len, off, \ + i->ubuf, (I)) \ + } else if (likely(iter_is_iovec(i))) { \ const struct iovec *iov = i->iov; \ void __user *base; \ size_t len; \ @@ -168,172 +183,10 @@ static int copyin(void *to, const void __user *from, size_t n) return n; } -static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, - struct iov_iter *i) +static inline struct pipe_buffer *pipe_buf(const struct pipe_inode_info *pipe, + unsigned int slot) { - size_t skip, copy, left, wanted; - const struct iovec *iov; - char __user *buf; - void *kaddr, *from; - - if (unlikely(bytes > i->count)) - bytes = i->count; - - if (unlikely(!bytes)) - return 0; - - might_fault(); - wanted = bytes; - iov = i->iov; - skip = i->iov_offset; - buf = iov->iov_base + skip; - copy = min(bytes, iov->iov_len - skip); - - if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_writeable(buf, copy)) { - kaddr = kmap_atomic(page); - from = kaddr + offset; - - /* first chunk, usually the only one */ - left = copyout(buf, from, copy); - copy -= left; - skip += copy; - from += copy; - bytes -= copy; - - while (unlikely(!left && bytes)) { - iov++; - buf = iov->iov_base; - copy = min(bytes, iov->iov_len); - left = copyout(buf, from, copy); - copy -= left; - skip = copy; - from += copy; - bytes -= copy; - } - if (likely(!bytes)) { - kunmap_atomic(kaddr); - goto done; - } - offset = from - kaddr; - buf += copy; - kunmap_atomic(kaddr); - copy = min(bytes, iov->iov_len - skip); - } - /* Too bad - revert to non-atomic kmap */ - - kaddr = kmap(page); - from = kaddr + offset; - left = copyout(buf, from, copy); - copy -= left; - skip += copy; - from += copy; - bytes -= copy; - while (unlikely(!left && bytes)) { - iov++; - buf = iov->iov_base; - copy = min(bytes, iov->iov_len); - left = copyout(buf, from, copy); - copy -= left; - skip = copy; - from += copy; - bytes -= copy; - } - kunmap(page); - -done: - if (skip == iov->iov_len) { - iov++; - skip = 0; - } - i->count -= wanted - bytes; - i->nr_segs -= iov - i->iov; - i->iov = iov; - i->iov_offset = skip; - return wanted - bytes; -} - -static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, - struct iov_iter *i) -{ - size_t skip, copy, left, wanted; - const struct iovec *iov; - char __user *buf; - void *kaddr, *to; - - if (unlikely(bytes > i->count)) - bytes = i->count; - - if (unlikely(!bytes)) - return 0; - - might_fault(); - wanted = bytes; - iov = i->iov; - skip = i->iov_offset; - buf = iov->iov_base + skip; - copy = min(bytes, iov->iov_len - skip); - - if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_readable(buf, copy)) { - kaddr = kmap_atomic(page); - to = kaddr + offset; - - /* first chunk, usually the only one */ - left = copyin(to, buf, copy); - copy -= left; - skip += copy; - to += copy; - bytes -= copy; - - while (unlikely(!left && bytes)) { - iov++; - buf = iov->iov_base; - copy = min(bytes, iov->iov_len); - left = copyin(to, buf, copy); - copy -= left; - skip = copy; - to += copy; - bytes -= copy; - } - if (likely(!bytes)) { - kunmap_atomic(kaddr); - goto done; - } - offset = to - kaddr; - buf += copy; - kunmap_atomic(kaddr); - copy = min(bytes, iov->iov_len - skip); - } - /* Too bad - revert to non-atomic kmap */ - - kaddr = kmap(page); - to = kaddr + offset; - left = copyin(to, buf, copy); - copy -= left; - skip += copy; - to += copy; - bytes -= copy; - while (unlikely(!left && bytes)) { - iov++; - buf = iov->iov_base; - copy = min(bytes, iov->iov_len); - left = copyin(to, buf, copy); - copy -= left; - skip = copy; - to += copy; - bytes -= copy; - } - kunmap(page); - -done: - if (skip == iov->iov_len) { - iov++; - skip = 0; - } - i->count -= wanted - bytes; - i->nr_segs -= iov - i->iov; - i->iov = iov; - i->iov_offset = skip; - return wanted - bytes; + return &pipe->bufs[slot & (pipe->ring_size - 1)]; } #ifdef PIPE_PARANOIA @@ -342,20 +195,19 @@ static bool sanity(const struct iov_iter *i) struct pipe_inode_info *pipe = i->pipe; unsigned int p_head = pipe->head; unsigned int p_tail = pipe->tail; - unsigned int p_mask = pipe->ring_size - 1; unsigned int p_occupancy = pipe_occupancy(p_head, p_tail); unsigned int i_head = i->head; unsigned int idx; - if (i->iov_offset) { + if (i->last_offset) { struct pipe_buffer *p; if (unlikely(p_occupancy == 0)) goto Bad; // pipe must be non-empty if (unlikely(i_head != p_head - 1)) goto Bad; // must be at the last buffer... - p = &pipe->bufs[i_head & p_mask]; - if (unlikely(p->offset + p->len != i->iov_offset)) + p = pipe_buf(pipe, i_head); + if (unlikely(p->offset + p->len != abs(i->last_offset))) goto Bad; // ... at the end of segment } else { if (i_head != p_head) @@ -363,7 +215,7 @@ static bool sanity(const struct iov_iter *i) } return true; Bad: - printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset); + printk(KERN_ERR "idx = %d, offset = %d\n", i_head, i->last_offset); printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n", p_head, p_tail, pipe->ring_size); for (idx = 0; idx < pipe->ring_size; idx++) @@ -379,15 +231,79 @@ Bad: #define sanity(i) true #endif +static struct page *push_anon(struct pipe_inode_info *pipe, unsigned size) +{ + struct page *page = alloc_page(GFP_USER); + if (page) { + struct pipe_buffer *buf = pipe_buf(pipe, pipe->head++); + *buf = (struct pipe_buffer) { + .ops = &default_pipe_buf_ops, + .page = page, + .offset = 0, + .len = size + }; + } + return page; +} + +static void push_page(struct pipe_inode_info *pipe, struct page *page, + unsigned int offset, unsigned int size) +{ + struct pipe_buffer *buf = pipe_buf(pipe, pipe->head++); + *buf = (struct pipe_buffer) { + .ops = &page_cache_pipe_buf_ops, + .page = page, + .offset = offset, + .len = size + }; + get_page(page); +} + +static inline int last_offset(const struct pipe_buffer *buf) +{ + if (buf->ops == &default_pipe_buf_ops) + return buf->len; // buf->offset is 0 for those + else + return -(buf->offset + buf->len); +} + +static struct page *append_pipe(struct iov_iter *i, size_t size, + unsigned int *off) +{ + struct pipe_inode_info *pipe = i->pipe; + int offset = i->last_offset; + struct pipe_buffer *buf; + struct page *page; + + if (offset > 0 && offset < PAGE_SIZE) { + // some space in the last buffer; add to it + buf = pipe_buf(pipe, pipe->head - 1); + size = min_t(size_t, size, PAGE_SIZE - offset); + buf->len += size; + i->last_offset += size; + i->count -= size; + *off = offset; + return buf->page; + } + // OK, we need a new buffer + *off = 0; + size = min_t(size_t, size, PAGE_SIZE); + if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) + return NULL; + page = push_anon(pipe, size); + if (!page) + return NULL; + i->head = pipe->head - 1; + i->last_offset = size; + i->count -= size; + return page; +} + static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { struct pipe_inode_info *pipe = i->pipe; - struct pipe_buffer *buf; - unsigned int p_tail = pipe->tail; - unsigned int p_mask = pipe->ring_size - 1; - unsigned int i_head = i->head; - size_t off; + unsigned int head = pipe->head; if (unlikely(bytes > i->count)) bytes = i->count; @@ -398,32 +314,21 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by if (!sanity(i)) return 0; - off = i->iov_offset; - buf = &pipe->bufs[i_head & p_mask]; - if (off) { - if (offset == off && buf->page == page) { - /* merge with the last one */ + if (offset && i->last_offset == -offset) { // could we merge it? + struct pipe_buffer *buf = pipe_buf(pipe, head - 1); + if (buf->page == page) { buf->len += bytes; - i->iov_offset += bytes; - goto out; + i->last_offset -= bytes; + i->count -= bytes; + return bytes; } - i_head++; - buf = &pipe->bufs[i_head & p_mask]; } - if (pipe_full(i_head, p_tail, pipe->max_usage)) + if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) return 0; - buf->ops = &page_cache_pipe_buf_ops; - buf->flags = 0; - get_page(page); - buf->page = page; - buf->offset = offset; - buf->len = bytes; - - pipe->head = i_head + 1; - i->iov_offset = offset + bytes; - i->head = i_head; -out: + push_page(pipe, page, offset, bytes); + i->last_offset = -(offset + bytes); + i->head = head; i->count -= bytes; return bytes; } @@ -443,7 +348,11 @@ out: */ size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size) { - if (iter_is_iovec(i)) { + if (iter_is_ubuf(i)) { + size_t n = min(size, iov_iter_count(i)); + n -= fault_in_readable(i->ubuf + i->iov_offset, n); + return size - n; + } else if (iter_is_iovec(i)) { size_t count = min(size, iov_iter_count(i)); const struct iovec *p; size_t skip; @@ -482,7 +391,11 @@ EXPORT_SYMBOL(fault_in_iov_iter_readable); */ size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size) { - if (iter_is_iovec(i)) { + if (iter_is_ubuf(i)) { + size_t n = min(size, iov_iter_count(i)); + n -= fault_in_safe_writeable(i->ubuf + i->iov_offset, n); + return size - n; + } else if (iter_is_iovec(i)) { size_t count = min(size, iov_iter_count(i)); const struct iovec *p; size_t skip; @@ -513,6 +426,7 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction, *i = (struct iov_iter) { .iter_type = ITER_IOVEC, .nofault = false, + .user_backed = true, .data_source = direction, .iov = iov, .nr_segs = nr_segs, @@ -522,101 +436,43 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction, } EXPORT_SYMBOL(iov_iter_init); -static inline bool allocated(struct pipe_buffer *buf) -{ - return buf->ops == &default_pipe_buf_ops; -} - -static inline void data_start(const struct iov_iter *i, - unsigned int *iter_headp, size_t *offp) -{ - unsigned int p_mask = i->pipe->ring_size - 1; - unsigned int iter_head = i->head; - size_t off = i->iov_offset; - - if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) || - off == PAGE_SIZE)) { - iter_head++; - off = 0; - } - *iter_headp = iter_head; - *offp = off; -} - -static size_t push_pipe(struct iov_iter *i, size_t size, - int *iter_headp, size_t *offp) +// returns the offset in partial buffer (if any) +static inline unsigned int pipe_npages(const struct iov_iter *i, int *npages) { struct pipe_inode_info *pipe = i->pipe; - unsigned int p_tail = pipe->tail; - unsigned int p_mask = pipe->ring_size - 1; - unsigned int iter_head; - size_t off; - ssize_t left; + int used = pipe->head - pipe->tail; + int off = i->last_offset; - if (unlikely(size > i->count)) - size = i->count; - if (unlikely(!size)) - return 0; + *npages = max((int)pipe->max_usage - used, 0); - left = size; - data_start(i, &iter_head, &off); - *iter_headp = iter_head; - *offp = off; - if (off) { - left -= PAGE_SIZE - off; - if (left <= 0) { - pipe->bufs[iter_head & p_mask].len += size; - return size; - } - pipe->bufs[iter_head & p_mask].len = PAGE_SIZE; - iter_head++; - } - while (!pipe_full(iter_head, p_tail, pipe->max_usage)) { - struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask]; - struct page *page = alloc_page(GFP_USER); - if (!page) - break; - - buf->ops = &default_pipe_buf_ops; - buf->flags = 0; - buf->page = page; - buf->offset = 0; - buf->len = min_t(ssize_t, left, PAGE_SIZE); - left -= buf->len; - iter_head++; - pipe->head = iter_head; - - if (left == 0) - return size; + if (off > 0 && off < PAGE_SIZE) { // anon and not full + (*npages)++; + return off; } - return size - left; + return 0; } static size_t copy_pipe_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { - struct pipe_inode_info *pipe = i->pipe; - unsigned int p_mask = pipe->ring_size - 1; - unsigned int i_head; - size_t n, off; + unsigned int off, chunk; - if (!sanity(i)) + if (unlikely(bytes > i->count)) + bytes = i->count; + if (unlikely(!bytes)) return 0; - bytes = n = push_pipe(i, bytes, &i_head, &off); - if (unlikely(!n)) + if (!sanity(i)) return 0; - do { - size_t chunk = min_t(size_t, n, PAGE_SIZE - off); - memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk); - i->head = i_head; - i->iov_offset = off + chunk; - n -= chunk; + + for (size_t n = bytes; n; n -= chunk) { + struct page *page = append_pipe(i, n, &off); + chunk = min_t(size_t, n, PAGE_SIZE - off); + if (!page) + return bytes - n; + memcpy_to_page(page, off, addr, chunk); addr += chunk; - off = 0; - i_head++; - } while (n); - i->count -= bytes; + } return bytes; } @@ -630,31 +486,32 @@ static __wsum csum_and_memcpy(void *to, const void *from, size_t len, static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, struct iov_iter *i, __wsum *sump) { - struct pipe_inode_info *pipe = i->pipe; - unsigned int p_mask = pipe->ring_size - 1; __wsum sum = *sump; size_t off = 0; - unsigned int i_head; - size_t r; + unsigned int chunk, r; + + if (unlikely(bytes > i->count)) + bytes = i->count; + if (unlikely(!bytes)) + return 0; if (!sanity(i)) return 0; - bytes = push_pipe(i, bytes, &i_head, &r); while (bytes) { - size_t chunk = min_t(size_t, bytes, PAGE_SIZE - r); - char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page); + struct page *page = append_pipe(i, bytes, &r); + char *p; + + if (!page) + break; + chunk = min_t(size_t, bytes, PAGE_SIZE - r); + p = kmap_local_page(page); sum = csum_and_memcpy(p + r, addr + off, chunk, sum, off); kunmap_local(p); - i->head = i_head; - i->iov_offset = r + chunk; - bytes -= chunk; off += chunk; - r = 0; - i_head++; + bytes -= chunk; } *sump = sum; - i->count -= off; return off; } @@ -662,7 +519,7 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { if (unlikely(iov_iter_is_pipe(i))) return copy_pipe_to_iter(addr, bytes, i); - if (iter_is_iovec(i)) + if (user_backed_iter(i)) might_fault(); iterate_and_advance(i, bytes, base, len, off, copyout(base, addr + off, len), @@ -686,32 +543,36 @@ static int copyout_mc(void __user *to, const void *from, size_t n) static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { - struct pipe_inode_info *pipe = i->pipe; - unsigned int p_mask = pipe->ring_size - 1; - unsigned int i_head; - size_t n, off, xfer = 0; + size_t xfer = 0; + unsigned int off, chunk; + + if (unlikely(bytes > i->count)) + bytes = i->count; + if (unlikely(!bytes)) + return 0; if (!sanity(i)) return 0; - n = push_pipe(i, bytes, &i_head, &off); - while (n) { - size_t chunk = min_t(size_t, n, PAGE_SIZE - off); - char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page); + while (bytes) { + struct page *page = append_pipe(i, bytes, &off); unsigned long rem; + char *p; + + if (!page) + break; + chunk = min_t(size_t, bytes, PAGE_SIZE - off); + p = kmap_local_page(page); rem = copy_mc_to_kernel(p + off, addr + xfer, chunk); chunk -= rem; kunmap_local(p); - i->head = i_head; - i->iov_offset = off + chunk; xfer += chunk; - if (rem) + bytes -= chunk; + if (rem) { + iov_iter_revert(i, rem); break; - n -= chunk; - off = 0; - i_head++; + } } - i->count -= xfer; return xfer; } @@ -744,7 +605,7 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { if (unlikely(iov_iter_is_pipe(i))) return copy_mc_pipe_to_iter(addr, bytes, i); - if (iter_is_iovec(i)) + if (user_backed_iter(i)) might_fault(); __iterate_and_advance(i, bytes, base, len, off, copyout_mc(base, addr + off, len), @@ -762,7 +623,7 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) WARN_ON(1); return 0; } - if (iter_is_iovec(i)) + if (user_backed_iter(i)) might_fault(); iterate_and_advance(i, bytes, base, len, off, copyin(addr + off, base, len), @@ -845,40 +706,21 @@ static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) return false; } -static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes, - struct iov_iter *i) -{ - if (likely(iter_is_iovec(i))) - return copy_page_to_iter_iovec(page, offset, bytes, i); - if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) { - void *kaddr = kmap_local_page(page); - size_t wanted = _copy_to_iter(kaddr + offset, bytes, i); - kunmap_local(kaddr); - return wanted; - } - if (iov_iter_is_pipe(i)) - return copy_page_to_iter_pipe(page, offset, bytes, i); - if (unlikely(iov_iter_is_discard(i))) { - if (unlikely(i->count < bytes)) - bytes = i->count; - i->count -= bytes; - return bytes; - } - WARN_ON(1); - return 0; -} - size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { size_t res = 0; if (unlikely(!page_copy_sane(page, offset, bytes))) return 0; + if (unlikely(iov_iter_is_pipe(i))) + return copy_page_to_iter_pipe(page, offset, bytes, i); page += offset / PAGE_SIZE; // first subpage offset %= PAGE_SIZE; while (1) { - size_t n = __copy_page_to_iter(page, offset, - min(bytes, (size_t)PAGE_SIZE - offset), i); + void *kaddr = kmap_local_page(page); + size_t n = min(bytes, (size_t)PAGE_SIZE - offset); + n = _copy_to_iter(kaddr + offset, n, i); + kunmap_local(kaddr); res += n; bytes -= n; if (!bytes || !n) @@ -896,47 +738,53 @@ EXPORT_SYMBOL(copy_page_to_iter); size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { - if (unlikely(!page_copy_sane(page, offset, bytes))) + size_t res = 0; + if (!page_copy_sane(page, offset, bytes)) return 0; - if (likely(iter_is_iovec(i))) - return copy_page_from_iter_iovec(page, offset, bytes, i); - if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) { + page += offset / PAGE_SIZE; // first subpage + offset %= PAGE_SIZE; + while (1) { void *kaddr = kmap_local_page(page); - size_t wanted = _copy_from_iter(kaddr + offset, bytes, i); + size_t n = min(bytes, (size_t)PAGE_SIZE - offset); + n = _copy_from_iter(kaddr + offset, n, i); kunmap_local(kaddr); - return wanted; + res += n; + bytes -= n; + if (!bytes || !n) + break; + offset += n; + if (offset == PAGE_SIZE) { + page++; + offset = 0; + } } - WARN_ON(1); - return 0; + return res; } EXPORT_SYMBOL(copy_page_from_iter); static size_t pipe_zero(size_t bytes, struct iov_iter *i) { - struct pipe_inode_info *pipe = i->pipe; - unsigned int p_mask = pipe->ring_size - 1; - unsigned int i_head; - size_t n, off; + unsigned int chunk, off; - if (!sanity(i)) + if (unlikely(bytes > i->count)) + bytes = i->count; + if (unlikely(!bytes)) return 0; - bytes = n = push_pipe(i, bytes, &i_head, &off); - if (unlikely(!n)) + if (!sanity(i)) return 0; - do { - size_t chunk = min_t(size_t, n, PAGE_SIZE - off); - char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page); + for (size_t n = bytes; n; n -= chunk) { + struct page *page = append_pipe(i, n, &off); + char *p; + + if (!page) + return bytes - n; + chunk = min_t(size_t, n, PAGE_SIZE - off); + p = kmap_local_page(page); memset(p + off, 0, chunk); kunmap_local(p); - i->head = i_head; - i->iov_offset = off + chunk; - n -= chunk; - off = 0; - i_head++; - } while (n); - i->count -= bytes; + } return bytes; } @@ -975,71 +823,50 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t byt } EXPORT_SYMBOL(copy_page_from_iter_atomic); -static inline void pipe_truncate(struct iov_iter *i) -{ - struct pipe_inode_info *pipe = i->pipe; - unsigned int p_tail = pipe->tail; - unsigned int p_head = pipe->head; - unsigned int p_mask = pipe->ring_size - 1; - - if (!pipe_empty(p_head, p_tail)) { - struct pipe_buffer *buf; - unsigned int i_head = i->head; - size_t off = i->iov_offset; - - if (off) { - buf = &pipe->bufs[i_head & p_mask]; - buf->len = off - buf->offset; - i_head++; - } - while (p_head != i_head) { - p_head--; - pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]); - } - - pipe->head = p_head; - } -} - static void pipe_advance(struct iov_iter *i, size_t size) { struct pipe_inode_info *pipe = i->pipe; - if (size) { - struct pipe_buffer *buf; - unsigned int p_mask = pipe->ring_size - 1; - unsigned int i_head = i->head; - size_t off = i->iov_offset, left = size; + int off = i->last_offset; + if (!off && !size) { + pipe_discard_from(pipe, i->start_head); // discard everything + return; + } + i->count -= size; + while (1) { + struct pipe_buffer *buf = pipe_buf(pipe, i->head); if (off) /* make it relative to the beginning of buffer */ - left += off - pipe->bufs[i_head & p_mask].offset; - while (1) { - buf = &pipe->bufs[i_head & p_mask]; - if (left <= buf->len) - break; - left -= buf->len; - i_head++; + size += abs(off) - buf->offset; + if (size <= buf->len) { + buf->len = size; + i->last_offset = last_offset(buf); + break; } - i->head = i_head; - i->iov_offset = buf->offset + left; + size -= buf->len; + i->head++; + off = 0; } - i->count -= size; - /* ... and discard everything past that point */ - pipe_truncate(i); + pipe_discard_from(pipe, i->head + 1); // discard everything past this one } static void iov_iter_bvec_advance(struct iov_iter *i, size_t size) { - struct bvec_iter bi; + const struct bio_vec *bvec, *end; + + if (!i->count) + return; + i->count -= size; - bi.bi_size = i->count; - bi.bi_bvec_done = i->iov_offset; - bi.bi_idx = 0; - bvec_iter_advance(i->bvec, &bi, size); + size += i->iov_offset; - i->bvec += bi.bi_idx; - i->nr_segs -= bi.bi_idx; - i->count = bi.bi_size; - i->iov_offset = bi.bi_bvec_done; + for (bvec = i->bvec, end = bvec + i->nr_segs; bvec < end; bvec++) { + if (likely(size < bvec->bv_len)) + break; + size -= bvec->bv_len; + } + i->iov_offset = size; + i->nr_segs -= bvec - i->bvec; + i->bvec = bvec; } static void iov_iter_iovec_advance(struct iov_iter *i, size_t size) @@ -1065,16 +892,16 @@ void iov_iter_advance(struct iov_iter *i, size_t size) { if (unlikely(i->count < size)) size = i->count; - if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) { + if (likely(iter_is_ubuf(i)) || unlikely(iov_iter_is_xarray(i))) { + i->iov_offset += size; + i->count -= size; + } else if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) { /* iovec and kvec have identical layouts */ iov_iter_iovec_advance(i, size); } else if (iov_iter_is_bvec(i)) { iov_iter_bvec_advance(i, size); } else if (iov_iter_is_pipe(i)) { pipe_advance(i, size); - } else if (unlikely(iov_iter_is_xarray(i))) { - i->iov_offset += size; - i->count -= size; } else if (iov_iter_is_discard(i)) { i->count -= size; } @@ -1090,28 +917,22 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) i->count += unroll; if (unlikely(iov_iter_is_pipe(i))) { struct pipe_inode_info *pipe = i->pipe; - unsigned int p_mask = pipe->ring_size - 1; - unsigned int i_head = i->head; - size_t off = i->iov_offset; - while (1) { - struct pipe_buffer *b = &pipe->bufs[i_head & p_mask]; - size_t n = off - b->offset; - if (unroll < n) { - off -= unroll; - break; - } - unroll -= n; - if (!unroll && i_head == i->start_head) { - off = 0; - break; + unsigned int head = pipe->head; + + while (head > i->start_head) { + struct pipe_buffer *b = pipe_buf(pipe, --head); + if (unroll < b->len) { + b->len -= unroll; + i->last_offset = last_offset(b); + i->head = head; + return; } - i_head--; - b = &pipe->bufs[i_head & p_mask]; - off = b->offset + b->len; + unroll -= b->len; + pipe_buf_release(pipe, b); + pipe->head--; } - i->iov_offset = off; - i->head = i_head; - pipe_truncate(i); + i->last_offset = 0; + i->head = head; return; } if (unlikely(iov_iter_is_discard(i))) @@ -1121,7 +942,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) return; } unroll -= i->iov_offset; - if (iov_iter_is_xarray(i)) { + if (iov_iter_is_xarray(i) || iter_is_ubuf(i)) { BUG(); /* We should never go beyond the start of the specified * range since we might then be straying into pages that * aren't pinned. @@ -1213,7 +1034,7 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction, .pipe = pipe, .head = pipe->head, .start_head = pipe->head, - .iov_offset = 0, + .last_offset = 0, .count = count }; } @@ -1268,6 +1089,105 @@ void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count) } EXPORT_SYMBOL(iov_iter_discard); +static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask, + unsigned len_mask) +{ + size_t size = i->count; + size_t skip = i->iov_offset; + unsigned k; + + for (k = 0; k < i->nr_segs; k++, skip = 0) { + size_t len = i->iov[k].iov_len - skip; + + if (len > size) + len = size; + if (len & len_mask) + return false; + if ((unsigned long)(i->iov[k].iov_base + skip) & addr_mask) + return false; + + size -= len; + if (!size) + break; + } + return true; +} + +static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask, + unsigned len_mask) +{ + size_t size = i->count; + unsigned skip = i->iov_offset; + unsigned k; + + for (k = 0; k < i->nr_segs; k++, skip = 0) { + size_t len = i->bvec[k].bv_len - skip; + + if (len > size) + len = size; + if (len & len_mask) + return false; + if ((unsigned long)(i->bvec[k].bv_offset + skip) & addr_mask) + return false; + + size -= len; + if (!size) + break; + } + return true; +} + +/** + * iov_iter_is_aligned() - Check if the addresses and lengths of each segments + * are aligned to the parameters. + * + * @i: &struct iov_iter to restore + * @addr_mask: bit mask to check against the iov element's addresses + * @len_mask: bit mask to check against the iov element's lengths + * + * Return: false if any addresses or lengths intersect with the provided masks + */ +bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask, + unsigned len_mask) +{ + if (likely(iter_is_ubuf(i))) { + if (i->count & len_mask) + return false; + if ((unsigned long)(i->ubuf + i->iov_offset) & addr_mask) + return false; + return true; + } + + if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) + return iov_iter_aligned_iovec(i, addr_mask, len_mask); + + if (iov_iter_is_bvec(i)) + return iov_iter_aligned_bvec(i, addr_mask, len_mask); + + if (iov_iter_is_pipe(i)) { + size_t size = i->count; + + if (size & len_mask) + return false; + if (size && i->last_offset > 0) { + if (i->last_offset & addr_mask) + return false; + } + + return true; + } + + if (iov_iter_is_xarray(i)) { + if (i->count & len_mask) + return false; + if ((i->xarray_start + i->iov_offset) & addr_mask) + return false; + } + + return true; +} +EXPORT_SYMBOL_GPL(iov_iter_is_aligned); + static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i) { unsigned long res = 0; @@ -1312,6 +1232,13 @@ static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i) unsigned long iov_iter_alignment(const struct iov_iter *i) { + if (likely(iter_is_ubuf(i))) { + size_t size = i->count; + if (size) + return ((unsigned long)i->ubuf + i->iov_offset) | size; + return 0; + } + /* iovec and kvec have identical layouts */ if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) return iov_iter_alignment_iovec(i); @@ -1320,11 +1247,10 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) return iov_iter_alignment_bvec(i); if (iov_iter_is_pipe(i)) { - unsigned int p_mask = i->pipe->ring_size - 1; size_t size = i->count; - if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask])) - return size | i->iov_offset; + if (size && i->last_offset > 0) + return size | i->last_offset; return size; } @@ -1342,6 +1268,9 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) size_t size = i->count; unsigned k; + if (iter_is_ubuf(i)) + return 0; + if (WARN_ON(!iter_is_iovec(i))) return ~0U; @@ -1360,45 +1289,50 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i) } EXPORT_SYMBOL(iov_iter_gap_alignment); -static inline ssize_t __pipe_get_pages(struct iov_iter *i, - size_t maxsize, - struct page **pages, - int iter_head, - size_t *start) +static int want_pages_array(struct page ***res, size_t size, + size_t start, unsigned int maxpages) { - struct pipe_inode_info *pipe = i->pipe; - unsigned int p_mask = pipe->ring_size - 1; - ssize_t n = push_pipe(i, maxsize, &iter_head, start); - if (!n) - return -EFAULT; + unsigned int count = DIV_ROUND_UP(size + start, PAGE_SIZE); - maxsize = n; - n += *start; - while (n > 0) { - get_page(*pages++ = pipe->bufs[iter_head & p_mask].page); - iter_head++; - n -= PAGE_SIZE; + if (count > maxpages) + count = maxpages; + WARN_ON(!count); // caller should've prevented that + if (!*res) { + *res = kvmalloc_array(count, sizeof(struct page *), GFP_KERNEL); + if (!*res) + return 0; } - - return maxsize; + return count; } static ssize_t pipe_get_pages(struct iov_iter *i, - struct page **pages, size_t maxsize, unsigned maxpages, + struct page ***pages, size_t maxsize, unsigned maxpages, size_t *start) { - unsigned int iter_head, npages; - size_t capacity; + unsigned int npages, count, off, chunk; + struct page **p; + size_t left; if (!sanity(i)) return -EFAULT; - data_start(i, &iter_head, start); - /* Amount of free space: some of this one + all after this one */ - npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); - capacity = min(npages, maxpages) * PAGE_SIZE - *start; - - return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start); + *start = off = pipe_npages(i, &npages); + if (!npages) + return -EFAULT; + count = want_pages_array(pages, maxsize, off, min(npages, maxpages)); + if (!count) + return -ENOMEM; + p = *pages; + for (npages = 0, left = maxsize ; npages < count; npages++, left -= chunk) { + struct page *page = append_pipe(i, left, &off); + if (!page) + break; + chunk = min_t(size_t, left, PAGE_SIZE - off); + get_page(*p++ = page); + } + if (!npages) + return -EFAULT; + return maxsize - left; } static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, @@ -1429,122 +1363,124 @@ static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa } static ssize_t iter_xarray_get_pages(struct iov_iter *i, - struct page **pages, size_t maxsize, + struct page ***pages, size_t maxsize, unsigned maxpages, size_t *_start_offset) { - unsigned nr, offset; - pgoff_t index, count; - size_t size = maxsize; + unsigned nr, offset, count; + pgoff_t index; loff_t pos; |