diff options
| -rw-r--r-- | arch/powerpc/kernel/kvm.c | 3 | ||||
| -rw-r--r-- | arch/powerpc/kernel/signal_32.c | 4 | ||||
| -rw-r--r-- | arch/powerpc/kernel/signal_64.c | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/fpu/signal.c | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/armada/armada_gem.c | 7 | ||||
| -rw-r--r-- | fs/btrfs/file.c | 7 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 5 | ||||
| -rw-r--r-- | fs/erofs/data.c | 2 | ||||
| -rw-r--r-- | fs/ext4/file.c | 5 | ||||
| -rw-r--r-- | fs/f2fs/file.c | 2 | ||||
| -rw-r--r-- | fs/fuse/file.c | 2 | ||||
| -rw-r--r-- | fs/gfs2/bmap.c | 60 | ||||
| -rw-r--r-- | fs/gfs2/file.c | 252 | ||||
| -rw-r--r-- | fs/gfs2/glock.c | 330 | ||||
| -rw-r--r-- | fs/gfs2/glock.h | 20 | ||||
| -rw-r--r-- | fs/gfs2/incore.h | 4 | ||||
| -rw-r--r-- | fs/iomap/buffered-io.c | 2 | ||||
| -rw-r--r-- | fs/iomap/direct-io.c | 29 | ||||
| -rw-r--r-- | fs/ntfs/file.c | 2 | ||||
| -rw-r--r-- | fs/ntfs3/file.c | 2 | ||||
| -rw-r--r-- | fs/xfs/xfs_file.c | 6 | ||||
| -rw-r--r-- | fs/zonefs/super.c | 4 | ||||
| -rw-r--r-- | include/linux/iomap.h | 11 | ||||
| -rw-r--r-- | include/linux/mm.h | 3 | ||||
| -rw-r--r-- | include/linux/pagemap.h | 58 | ||||
| -rw-r--r-- | include/linux/uio.h | 4 | ||||
| -rw-r--r-- | lib/iov_iter.c | 103 | ||||
| -rw-r--r-- | mm/filemap.c | 4 | ||||
| -rw-r--r-- | mm/gup.c | 139 |
29 files changed, 791 insertions, 283 deletions
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 617eba82531c..6568823cf306 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -669,7 +669,8 @@ static void __init kvm_use_magic_page(void) on_each_cpu(kvm_map_magic_page, &features, 1); /* Quick self-test to see if the mapping works */ - if (!fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) { + if (fault_in_readable((const char __user *)KVM_MAGIC_PAGE, + sizeof(u32))) { kvm_patching_worked = false; return; } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 0608581967f0..38c3eae40c14 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1048,7 +1048,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, if (new_ctx == NULL) return 0; if (!access_ok(new_ctx, ctx_size) || - fault_in_pages_readable((u8 __user *)new_ctx, ctx_size)) + fault_in_readable((char __user *)new_ctx, ctx_size)) return -EFAULT; /* @@ -1237,7 +1237,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, #endif if (!access_ok(ctx, sizeof(*ctx)) || - fault_in_pages_readable((u8 __user *)ctx, sizeof(*ctx))) + fault_in_readable((char __user *)ctx, sizeof(*ctx))) return -EFAULT; /* diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 1831bba0582e..9f471b4a11e3 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -688,7 +688,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, if (new_ctx == NULL) return 0; if (!access_ok(new_ctx, ctx_size) || - fault_in_pages_readable((u8 __user *)new_ctx, ctx_size)) + fault_in_readable((char __user *)new_ctx, ctx_size)) return -EFAULT; /* diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index cc977da6e128..d5958278eba6 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -309,7 +309,7 @@ retry: if (ret != X86_TRAP_PF) return false; - if (!fault_in_pages_readable(buf, size)) + if (!fault_in_readable(buf, size)) goto retry; return false; } diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c index 21909642ee4c..8fbb25913327 100644 --- a/drivers/gpu/drm/armada/armada_gem.c +++ b/drivers/gpu/drm/armada/armada_gem.c @@ -336,7 +336,7 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data, struct drm_armada_gem_pwrite *args = data; struct armada_gem_object *dobj; char __user *ptr; - int ret; + int ret = 0; DRM_DEBUG_DRIVER("handle %u off %u size %u ptr 0x%llx\n", args->handle, args->offset, args->size, args->ptr); @@ -349,9 +349,8 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data, if (!access_ok(ptr, args->size)) return -EFAULT; - ret = fault_in_pages_readable(ptr, args->size); - if (ret) - return ret; + if (fault_in_readable(ptr, args->size)) + return -EFAULT; dobj = armada_gem_object_lookup(file, args->handle); if (dobj == NULL) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9a3db1365ae8..581662d16b72 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1718,7 +1718,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, * Fault pages before locking them in prepare_pages * to avoid recursive lock */ - if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) { + if (unlikely(fault_in_iov_iter_readable(i, write_bytes))) { ret = -EFAULT; break; } @@ -1965,7 +1965,7 @@ relock: } dio = __iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops, &btrfs_dio_ops, - 0); + 0, 0); btrfs_inode_unlock(inode, ilock_flags); @@ -3668,7 +3668,8 @@ static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to) return 0; btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); - ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dio_ops, 0); + ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dio_ops, + 0, 0); btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); return ret; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 02ff085c31bf..fb8cc9642ac4 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2222,9 +2222,8 @@ static noinline int search_ioctl(struct inode *inode, key.offset = sk->min_offset; while (1) { - ret = fault_in_pages_writeable(ubuf + sk_offset, - *buf_size - sk_offset); - if (ret) + ret = -EFAULT; + if (fault_in_writeable(ubuf + sk_offset, *buf_size - sk_offset)) break; ret = btrfs_search_forward(root, &key, path, sk->min_transid); diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 808234d9190c..0e35ef3f9f3d 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -334,7 +334,7 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (!err) return iomap_dio_rw(iocb, to, &erofs_iomap_ops, - NULL, 0); + NULL, 0, 0); if (err < 0) return err; } diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 9c5559faacda..4c5f41052351 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -74,7 +74,7 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) return generic_file_read_iter(iocb, to); } - ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0); + ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0, 0); inode_unlock_shared(inode); file_accessed(iocb->ki_filp); @@ -566,7 +566,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) if (ilock_shared) iomap_ops = &ext4_iomap_overwrite_ops; ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops, - (unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0); + (unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0, + 0); if (ret == -ENOTBLK) ret = 0; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9c8ef33bd8d3..eb971e1e7227 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4276,7 +4276,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) size_t target_size = 0; int err; - if (iov_iter_fault_in_readable(from, iov_iter_count(from))) + if (fault_in_iov_iter_readable(from, iov_iter_count(from))) set_inode_flag(inode, FI_NO_PREALLOC); if ((iocb->ki_flags & IOCB_NOWAIT)) { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index e6039f22311b..34b6d0650e66 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1164,7 +1164,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia, again: err = -EFAULT; - if (iov_iter_fault_in_readable(ii, bytes)) + if (fault_in_iov_iter_readable(ii, bytes)) break; err = -ENOMEM; diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 5414c2c33580..7235d539e969 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -961,46 +961,6 @@ hole_found: goto out; } -static int gfs2_write_lock(struct inode *inode) -{ - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); - int error; - - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); - error = gfs2_glock_nq(&ip->i_gh); - if (error) - goto out_uninit; - if (&ip->i_inode == sdp->sd_rindex) { - struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); - - error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, - GL_NOCACHE, &m_ip->i_gh); - if (error) - goto out_unlock; - } - return 0; - -out_unlock: - gfs2_glock_dq(&ip->i_gh); -out_uninit: - gfs2_holder_uninit(&ip->i_gh); - return error; -} - -static void gfs2_write_unlock(struct inode *inode) -{ - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); - - if (&ip->i_inode == sdp->sd_rindex) { - struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); - - gfs2_glock_dq_uninit(&m_ip->i_gh); - } - gfs2_glock_dq_uninit(&ip->i_gh); -} - static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos, unsigned len) { @@ -1118,11 +1078,6 @@ out_qunlock: return ret; } -static inline bool gfs2_iomap_need_write_lock(unsigned flags) -{ - return (flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT); -} - static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, unsigned flags, struct iomap *iomap, struct iomap *srcmap) @@ -1135,12 +1090,6 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, iomap->flags |= IOMAP_F_BUFFER_HEAD; trace_gfs2_iomap_start(ip, pos, length, flags); - if (gfs2_iomap_need_write_lock(flags)) { - ret = gfs2_write_lock(inode); - if (ret) - goto out; - } - ret = __gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); if (ret) goto out_unlock; @@ -1168,10 +1117,7 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp); out_unlock: - if (ret && gfs2_iomap_need_write_lock(flags)) - gfs2_write_unlock(inode); release_metapath(&mp); -out: trace_gfs2_iomap_end(ip, iomap, ret); return ret; } @@ -1219,15 +1165,11 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, } if (unlikely(!written)) - goto out_unlock; + return 0; if (iomap->flags & IOMAP_F_SIZE_CHANGED) mark_inode_dirty(inode); set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); - -out_unlock: - if (gfs2_iomap_need_write_lock(flags)) - gfs2_write_unlock(inode); return 0; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 5436a688157a..8f4627a19359 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -776,27 +776,99 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, return ret ? ret : ret1; } +static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i, + size_t *prev_count, + size_t *window_size) +{ + char __user *p = i->iov[0].iov_base + i->iov_offset; + size_t count = iov_iter_count(i); + int pages = 1; + + if (likely(!count)) + return false; + if (ret <= 0 && ret != -EFAULT) + return false; + if (!iter_is_iovec(i)) + return false; + + if (*prev_count != count || !*window_size) { + int pages, nr_dirtied; + + pages = min_t(int, BIO_MAX_VECS, + DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE)); + nr_dirtied = max(current->nr_dirtied_pause - + current->nr_dirtied, 1); + pages = min(pages, nr_dirtied); + } + + *prev_count = count; + *window_size = (size_t)PAGE_SIZE * pages - offset_in_page(p); + return true; +} + static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to, struct gfs2_holder *gh) { struct file *file = iocb->ki_filp; struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); - size_t count = iov_iter_count(to); + size_t prev_count = 0, window_size = 0; + size_t written = 0; ssize_t ret; - if (!count) + /* + * In this function, we disable page faults when we're holding the + * inode glock while doing I/O. If a page fault occurs, we indicate + * that the inode glock may be dropped, fault in the pages manually, + * and retry. + * + * Unlike generic_file_read_iter, for reads, iomap_dio_rw can trigger + * physical as well as manual page faults, and we need to disable both + * kinds. + * + * For direct I/O, gfs2 takes the inode glock in deferred mode. This + * locking mode is compatible with other deferred holders, so multiple + * processes and nodes can do direct I/O to a file at the same time. + * There's no guarantee that reads or writes will be atomic. Any + * coordination among readers and writers needs to happen externally. + */ + + if (!iov_iter_count(to)) return 0; /* skip atime */ gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh); +retry: ret = gfs2_glock_nq(gh); if (ret) goto out_uninit; +retry_under_glock: + pagefault_disable(); + to->nofault = true; + ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, + IOMAP_DIO_PARTIAL, written); + to->nofault = false; + pagefault_enable(); + if (ret > 0) + written = ret; + + if (should_fault_in_pages(ret, to, &prev_count, &window_size)) { + size_t leftover; - ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, 0); - gfs2_glock_dq(gh); + gfs2_holder_allow_demote(gh); + leftover = fault_in_iov_iter_writeable(to, window_size); + gfs2_holder_disallow_demote(gh); + if (leftover != window_size) { + if (!gfs2_holder_queued(gh)) + goto retry; + goto retry_under_glock; + } + } + if (gfs2_holder_queued(gh)) + gfs2_glock_dq(gh); out_uninit: gfs2_holder_uninit(gh); - return ret; + if (ret < 0) + return ret; + return written; } static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, @@ -805,11 +877,21 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct gfs2_inode *ip = GFS2_I(inode); - size_t len = iov_iter_count(from); - loff_t offset = iocb->ki_pos; + size_t prev_count = 0, window_size = 0; + size_t read = 0; ssize_t ret; /* + * In this function, we disable page faults when we're holding the + * inode glock while doing I/O. If a page fault occurs, we indicate + * that the inode glock may be dropped, fault in the pages manually, + * and retry. + * + * For writes, iomap_dio_rw only triggers manual page faults, so we + * don't need to disable physical ones. + */ + + /* * Deferred lock, even if its a write, since we do no allocation on * this path. All we need to change is the atime, and this lock mode * ensures that other nodes have flushed their buffered read caches @@ -818,31 +900,62 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, * VFS does. */ gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, gh); +retry: ret = gfs2_glock_nq(gh); if (ret) goto out_uninit; - +retry_under_glock: /* Silently fall back to buffered I/O when writing beyond EOF */ - if (offset + len > i_size_read(&ip->i_inode)) + if (iocb->ki_pos + iov_iter_count(from) > i_size_read(&ip->i_inode)) goto out; - ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, 0); + from->nofault = true; + ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, + IOMAP_DIO_PARTIAL, read); + from->nofault = false; + if (ret == -ENOTBLK) ret = 0; + if (ret > 0) + read = ret; + + if (should_fault_in_pages(ret, from, &prev_count, &window_size)) { + size_t leftover; + + gfs2_holder_allow_demote(gh); + leftover = fault_in_iov_iter_readable(from, window_size); + gfs2_holder_disallow_demote(gh); + if (leftover != window_size) { + if (!gfs2_holder_queued(gh)) + goto retry; + goto retry_under_glock; + } + } out: - gfs2_glock_dq(gh); + if (gfs2_holder_queued(gh)) + gfs2_glock_dq(gh); out_uninit: gfs2_holder_uninit(gh); - return ret; + if (ret < 0) + return ret; + return read; } static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct gfs2_inode *ip; struct gfs2_holder gh; + size_t prev_count = 0, window_size = 0; size_t written = 0; ssize_t ret; + /* + * In this function, we disable page faults when we're holding the + * inode glock while doing I/O. If a page fault occurs, we indicate + * that the inode glock may be dropped, fault in the pages manually, + * and retry. + */ + if (iocb->ki_flags & IOCB_DIRECT) { ret = gfs2_file_direct_read(iocb, to, &gh); if (likely(ret != -ENOTBLK)) @@ -864,18 +977,118 @@ static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) } ip = GFS2_I(iocb->ki_filp->f_mapping->host); gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); +retry: ret = gfs2_glock_nq(&gh); if (ret) goto out_uninit; +retry_under_glock: + pagefault_disable(); ret = generic_file_read_iter(iocb, to); + pagefault_enable(); if (ret > 0) written += ret; - gfs2_glock_dq(&gh); + + if (should_fault_in_pages(ret, to, &prev_count, &window_size)) { + size_t leftover; + + gfs2_holder_allow_demote(&gh); + leftover = fault_in_iov_iter_writeable(to, window_size); + gfs2_holder_disallow_demote(&gh); + if (leftover != window_size) { + if (!gfs2_holder_queued(&gh)) { + if (written) + goto out_uninit; + goto retry; + } + goto retry_under_glock; + } + } + if (gfs2_holder_queued(&gh)) + gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); return written ? written : ret; } +static ssize_t gfs2_file_buffered_write(struct kiocb *iocb, + struct iov_iter *from, + struct gfs2_holder *gh) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_holder *statfs_gh = NULL; + size_t prev_count = 0, window_size = 0; + size_t read = 0; + ssize_t ret; + + /* + * In this function, we disable page faults when we're holding the + * inode glock while doing I/O. If a page fault occurs, we indicate + * that the inode glock may be dropped, fault in the pages manually, + * and retry. + */ + + if (inode == sdp->sd_rindex) { + statfs_gh = kmalloc(sizeof(*statfs_gh), GFP_NOFS); + if (!statfs_gh) + return -ENOMEM; + } + + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, gh); +retry: + ret = gfs2_glock_nq(gh); + if (ret) + goto out_uninit; +retry_under_glock: + if (inode == sdp->sd_rindex) { + struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); + + ret = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, + GL_NOCACHE, statfs_gh); + if (ret) + goto out_unlock; + } + + current->backing_dev_info = inode_to_bdi(inode); + pagefault_disable(); + ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); + pagefault_enable(); + current->backing_dev_info = NULL; + if (ret > 0) { + iocb->ki_pos += ret; + read += ret; + } + + if (inode == sdp->sd_rindex) + gfs2_glock_dq_uninit(statfs_gh); + + if (should_fault_in_pages(ret, from, &prev_count, &window_size)) { + size_t leftover; + + gfs2_holder_allow_demote(gh); + leftover = fault_in_iov_iter_readable(from, window_size); + gfs2_holder_disallow_demote(gh); + if (leftover != window_size) { + if (!gfs2_holder_queued(gh)) { + if (read) + goto out_uninit; + goto retry; + } + goto retry_under_glock; + } + } +out_unlock: + if (gfs2_holder_queued(gh)) + gfs2_glock_dq(gh); +out_uninit: + gfs2_holder_uninit(gh); + if (statfs_gh) + kfree(statfs_gh); + return read ? read : ret; +} + /** * gfs2_file_write_iter - Perform a write to a file * @iocb: The io context @@ -927,9 +1140,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out_unlock; iocb->ki_flags |= IOCB_DSYNC; - current->backing_dev_info = inode_to_bdi(inode); - buffered = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); - current->backing_dev_info = NULL; + buffered = gfs2_file_buffered_write(iocb, from, &gh); if (unlikely(buffered <= 0)) { if (!ret) ret = buffered; @@ -943,7 +1154,6 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) * the direct I/O range as we don't know if the buffered pages * made it to disk. */ - iocb->ki_pos += buffered; ret2 = generic_write_sync(iocb, buffered); invalidate_mapping_pages(mapping, (iocb->ki_pos - buffered) >> PAGE_SHIFT, @@ -951,13 +1161,9 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (!ret || ret2 > 0) ret += ret2; } else { - current->backing_dev_info = inode_to_bdi(inode); - ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); - current->backing_dev_info = NULL; - if (likely(ret > 0)) { - iocb->ki_pos += ret; + ret = gfs2_file_buffered_write(iocb, from, &gh); + if (likely(ret > 0)) ret = generic_write_sync(iocb, ret); - } } out_unlock: diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index e0eaa9cf9fb6..f686083d0250 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -58,6 +58,7 @@ struct gfs2_glock_iter { typedef void (*glock_examiner) (struct gfs2_glock * gl); static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); +static void __gfs2_glock_dq(struct gfs2_holder *gh); static struct dentry *gfs2_root; static struct workqueue_struct *glock_workqueue; @@ -197,6 +198,12 @@ static int demote_ok(const struct gfs2_glock *gl) if (gl->gl_state == LM_ST_UNLOCKED) return 0; + /* + * Note that demote_ok is used for the lru process of disposing of + * glocks. For this purpose, we don't care if the glock's holders + * have the HIF_MAY_DEMOTE flag set or not. If someone is using + * them, don't demote. + */ if (!list_empty(&gl->gl_holders)) return 0; if (glops->go_demote_ok) @@ -301,46 +308,59 @@ void gfs2_glock_put(struct gfs2_glock *gl) } /** - * may_grant - check if its ok to grant a new lock + * may_grant - check if it's ok to grant a new lock * @gl: The glock + * @current_gh: One of the current holders of @gl * @gh: The lock request which we wish to grant * - * Returns: true if its ok to grant the lock + * With our current compatibility rules, if a glock has one or more active + * holders (HIF_HOLDER flag set), any of those holders can be passed in as + * @current_gh; they are all the same as far as compatibility with the new @gh + * goes. + * + * Returns true if it's ok to grant the lock. */ -static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh) -{ - const struct gfs2_holder *gh_head = list_first_entry(&gl->gl_holders, const struct gfs2_holder, gh_list); +static inline bool may_grant(struct gfs2_glock *gl, + struct gfs2_holder *current_gh, + struct gfs2_holder *gh) +{ + if (current_gh) { + GLOCK_BUG_ON(gl, !test_bit(HIF_HOLDER, ¤t_gh->gh_iflags)); + + switch(current_gh->gh_state) { + case LM_ST_EXCLUSIVE: + /* + * Here we make a special exception to grant holders + * who agree to share the EX lock with other holders + * who also have the bit set. If the original holder + * has the LM_FLAG_NODE_SCOPE bit set, we grant more + * holders with the bit set. + */ + return gh->gh_state == LM_ST_EXCLUSIVE && + (current_gh->gh_flags & LM_FLAG_NODE_SCOPE) && + (gh->gh_flags & LM_FLAG_NODE_SCOPE); - if (gh != gh_head) { - /** - * Here we make a special exception to grant holders who agree - * to share the EX lock with other holders who also have the - * bit set. If the original holder has the LM_FLAG_NODE_SCOPE bit - * is set, we grant more holders with the bit set. - */ - if (gh_head->gh_state == LM_ST_EXCLUSIVE && - (gh_head->gh_flags & LM_FLAG_NODE_SCOPE) && - gh->gh_state == LM_ST_EXCLUSIVE && - (gh->gh_flags & LM_FLAG_NODE_SCOPE)) - return 1; - if ((gh->gh_state == LM_ST_EXCLUSIVE || - gh_head->gh_state == LM_ST_EXCLUSIVE)) - return 0; + case LM_ST_SHARED: + case LM_ST_DEFERRED: + return gh->gh_state == current_gh->gh_state; + + default: + return false; + } } + if (gl->gl_state == gh->gh_state) - return 1; + return true; if (gh->gh_flags & GL_EXACT) - return 0; + return false; if (gl->gl_state == LM_ST_EXCLUSIVE) { - if (gh->gh_state == LM_ST_SHARED && gh_head->gh_state == LM_ST_SHARED) - return 1; - if (gh->gh_state == LM_ST_DEFERRED && gh_head->gh_state == LM_ST_DEFERRED) - return 1; + return gh->gh_state == LM_ST_SHARED || + gh->gh_state == LM_ST_DEFERRED; } - if (gl->gl_state != LM_ST_UNLOCKED && (gh->gh_flags & LM_FLAG_ANY)) - return 1; - return 0; + if (gh->gh_flags & LM_FLAG_ANY) + return gl->gl_state != LM_ST_UNLOCKED; + return false; } static void gfs2_holder_wake(struct gfs2_holder *gh) @@ -366,7 +386,7 @@ static void do_error(struct gfs2_glock *gl, const int ret) struct gfs2_holder *gh, *tmp; list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { - if (test_bit(HIF_HOLDER, &gh->gh_iflags)) + if (!test_bit(HIF_WAIT, &gh->gh_iflags)) continue; if (ret & LM_OUT_ERROR) gh->gh_error = -EIO; @@ -381,6 +401,78 @@ static void do_error(struct gfs2_glock *gl, const int ret) } /** + * demote_incompat_holders - demote incompatible demoteable holders + * @gl: the glock we want to promote + * @new_gh: the new holder to be promoted + */ +static void demote_incompat_holders(struct gfs2_glock *gl, + struct gfs2_holder *new_gh) +{ + struct gfs2_holder *gh; + + /* + * Demote incompatible holders before we make ourselves eligible. + * (This holder may or may not allow auto-demoting, but we don't want + * to demote the new holder before it's even granted.) + */ + list_for_each_entry(gh, &gl->gl_holders, gh_list) { + /* + * Since holders are at the front of the list, we stop when we + * find the first non-holder. + */ + if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) + return; + if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags) && + !may_grant(gl, new_gh, gh)) { + /* + * We should not recurse into do_promote because + * __gfs2_glock_dq only calls handle_callback, + * gfs2_glock_add_to_lru and __gfs2_glock_queue_work. + */ + __gfs2_glock_dq(gh); + } + } +} + +/** + * find_first_holder - find the first "holder" gh + * @gl: the glock + */ + +static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl) +{ + struct gfs2_holder *gh; + + if (!list_empty(&gl->gl_holders)) { + gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, + gh_list); + if (test_bit(HIF_HOLDER, &gh->gh_iflags)) + return gh; + } + return NULL; +} + +/** + * find_first_strong_holder - find the first non-demoteable holder + * @gl: the glock + * + * Find the first holder that doesn't have the HIF_MAY_DEMOTE flag set. + */ +static inline struct gfs2_holder * +find_first_strong_holder(struct gfs2_glock *gl) +{ + struct gfs2_holder *gh; + + list_for_each_entry(gh, &gl->gl_holders, gh_list) { + if (!test_bit(HIF_HOLDER, &gh->gh_iflags)) + return NULL; + if (!test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags)) + return gh; + } + return NULL; +} + +/** * do_promote - promote as many requests as possible on the current queue * @gl: The glock * @@ -393,14 +485,21 @@ __releases(&gl->gl_lockref.lock) __acquires(&gl->gl_lockref.lock) { const struct gfs2_glock_operations *glops = gl->gl_ops; - struct gfs2_holder *gh, *tmp; + struct gfs2_holder *gh, *tmp, *first_gh; + bool incompat_holders_demoted = false; int ret; restart: + first_gh = find_first_strong_holder(gl); list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) { - if (test_bit(HIF_HOLDER, &gh->gh_iflags)) + if (!test_bit(HIF_WAIT, &gh- |
