diff options
| -rw-r--r-- | Documentation/filesystems/overlayfs.rst | 40 | ||||
| -rw-r--r-- | fs/overlayfs/Makefile | 2 | ||||
| -rw-r--r-- | fs/overlayfs/copy_up.c | 142 | ||||
| -rw-r--r-- | fs/overlayfs/dir.c | 64 | ||||
| -rw-r--r-- | fs/overlayfs/export.c | 7 | ||||
| -rw-r--r-- | fs/overlayfs/file.c | 88 | ||||
| -rw-r--r-- | fs/overlayfs/inode.c | 165 | ||||
| -rw-r--r-- | fs/overlayfs/namei.c | 52 | ||||
| -rw-r--r-- | fs/overlayfs/overlayfs.h | 72 | ||||
| -rw-r--r-- | fs/overlayfs/params.c | 322 | ||||
| -rw-r--r-- | fs/overlayfs/params.h | 1 | ||||
| -rw-r--r-- | fs/overlayfs/readdir.c | 27 | ||||
| -rw-r--r-- | fs/overlayfs/super.c | 92 | ||||
| -rw-r--r-- | fs/overlayfs/util.c | 115 | ||||
| -rw-r--r-- | fs/overlayfs/xattrs.c | 271 | ||||
| -rw-r--r-- | fs/super.c | 1 |
16 files changed, 929 insertions, 532 deletions
diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst index 5b93268e400f..0407f361f32a 100644 --- a/Documentation/filesystems/overlayfs.rst +++ b/Documentation/filesystems/overlayfs.rst @@ -344,10 +344,11 @@ escaping the colons with a single backslash. For example: mount -t overlay overlay -olowerdir=/a\:lower\:\:dir /merged -Since kernel version v6.5, directory names containing colons can also -be provided as lower layer using the fsconfig syscall from new mount api: +Since kernel version v6.8, directory names containing colons can also +be configured as lower layer using the "lowerdir+" mount options and the +fsconfig syscall from new mount api. For example: - fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir", "/a:lower::dir", 0); + fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/a:lower::dir", 0); In the latter case, colons in lower layer directory names will be escaped as an octal characters (\072) when displayed in /proc/self/mountinfo. @@ -416,6 +417,16 @@ Only the data of the files in the "data-only" lower layers may be visible when a "metacopy" file in one of the lower layers above it, has a "redirect" to the absolute path of the "lower data" file in the "data-only" lower layer. +Since kernel version v6.8, "data-only" lower layers can also be added using +the "datadir+" mount options and the fsconfig syscall from new mount api. +For example: + + fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/l1", 0); + fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/l2", 0); + fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/l3", 0); + fsconfig(fs_fd, FSCONFIG_SET_STRING, "datadir+", "/do1", 0); + fsconfig(fs_fd, FSCONFIG_SET_STRING, "datadir+", "/do2", 0); + fs-verity support ---------------------- @@ -504,6 +515,29 @@ directory tree on the same or different underlying filesystem, and even to a different machine. With the "inodes index" feature, trying to mount the copied layers will fail the verification of the lower root file handle. +Nesting overlayfs mounts +------------------------ + +It is possible to use a lower directory that is stored on an overlayfs +mount. For regular files this does not need any special care. However, files +that have overlayfs attributes, such as whiteouts or "overlay.*" xattrs will be +interpreted by the underlying overlayfs mount and stripped out. In order to +allow the second overlayfs mount to see the attributes they must be escaped. + +Overlayfs specific xattrs are escaped by using a special prefix of +"overlay.overlay.". So, a file with a "trusted.overlay.overlay.metacopy" xattr +in the lower dir will be exposed as a regular file with a +"trusted.overlay.metacopy" xattr in the overlayfs mount. This can be nested by +repeating the prefix multiple time, as each instance only removes one prefix. + +A lower dir with a regular whiteout will always be handled by the overlayfs +mount, so to support storing an effective whiteout file in an overlayfs mount an +alternative form of whiteout is supported. This form is a regular, zero-size +file with the "overlay.whiteout" xattr set, inside a directory with the +"overlay.whiteouts" xattr set. Such whiteouts are never created by overlayfs, +but can be used by userspace tools (like containers) that generate lower layers. +These alternative whiteouts can be escaped using the standard xattr escape +mechanism in order to properly nest to any depth. Non-standard behavior --------------------- diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile index 4e173d56b11f..5648954f8588 100644 --- a/fs/overlayfs/Makefile +++ b/fs/overlayfs/Makefile @@ -6,4 +6,4 @@ obj-$(CONFIG_OVERLAY_FS) += overlay.o overlay-objs := super.o namei.o util.o inode.o file.o dir.o readdir.o \ - copy_up.o export.o params.o + copy_up.o export.o params.o xattrs.o diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index ada3fcc9c6d5..4382881b0709 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -252,7 +252,9 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, return PTR_ERR(old_file); /* Try to use clone_file_range to clone up within the same fs */ + ovl_start_write(dentry); cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0); + ovl_end_write(dentry); if (cloned == len) goto out_fput; /* Couldn't clone, so now we try to copy the data */ @@ -287,8 +289,12 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, * it may not recognize all kind of holes and sometimes * only skips partial of hole area. However, it will be * enough for most of the use cases. + * + * We do not hold upper sb_writers throughout the loop to avert + * lockdep warning with llseek of lower file in nested overlay: + * - upper sb_writers + * -- lower ovl_inode_lock (ovl_llseek) */ - if (skip_hole && data_pos < old_pos) { data_pos = vfs_llseek(old_file, old_pos, SEEK_DATA); if (data_pos > old_pos) { @@ -303,9 +309,11 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry, } } + ovl_start_write(dentry); bytes = do_splice_direct(old_file, &old_pos, new_file, &new_pos, this_len, SPLICE_F_MOVE); + ovl_end_write(dentry); if (bytes <= 0) { error = bytes; break; @@ -426,29 +434,29 @@ out_err: return ERR_PTR(err); } -int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower, - struct dentry *upper) +struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin) { - const struct ovl_fh *fh = NULL; - int err; - /* * When lower layer doesn't support export operations store a 'null' fh, * so we can use the overlay.origin xattr to distignuish between a copy * up and a pure upper inode. */ - if (ovl_can_decode_fh(lower->d_sb)) { - fh = ovl_encode_real_fh(ofs, lower, false); - if (IS_ERR(fh)) - return PTR_ERR(fh); - } + if (!ovl_can_decode_fh(origin->d_sb)) + return NULL; + + return ovl_encode_real_fh(ofs, origin, false); +} + +int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh, + struct dentry *upper) +{ + int err; /* * Do not fail when upper doesn't support xattrs. */ err = ovl_check_setxattr(ofs, upper, OVL_XATTR_ORIGIN, fh->buf, fh ? fh->fb.len : 0, 0); - kfree(fh); /* Ignore -EPERM from setting "user.*" on symlink/special */ return err == -EPERM ? 0 : err; @@ -476,7 +484,7 @@ static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper, * * Caller must hold i_mutex on indexdir. */ -static int ovl_create_index(struct dentry *dentry, struct dentry *origin, +static int ovl_create_index(struct dentry *dentry, const struct ovl_fh *fh, struct dentry *upper) { struct ovl_fs *ofs = OVL_FS(dentry->d_sb); @@ -502,7 +510,7 @@ static int ovl_create_index(struct dentry *dentry, struct dentry *origin, if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry)))) return -EIO; - err = ovl_get_index_name(ofs, origin, &name); + err = ovl_get_index_name_fh(fh, &name); if (err) return err; @@ -541,6 +549,7 @@ struct ovl_copy_up_ctx { struct dentry *destdir; struct qstr destname; struct dentry *workdir; + const struct ovl_fh *origin_fh; bool origin; bool indexed; bool metacopy; @@ -555,14 +564,16 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); struct inode *udir = d_inode(upperdir); + ovl_start_write(c->dentry); + /* Mark parent "impure" because it may now contain non-pure upper */ err = ovl_set_impure(c->parent, upperdir); if (err) - return err; + goto out; err = ovl_set_nlink_lower(c->dentry); if (err) - return err; + goto out; inode_lock_nested(udir, I_MUTEX_PARENT); upper = ovl_lookup_upper(ofs, c->dentry->d_name.name, upperdir, @@ -581,10 +592,12 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) } inode_unlock(udir); if (err) - return err; + goto out; err = ovl_set_nlink_upper(c->dentry); +out: + ovl_end_write(c->dentry); return err; } @@ -637,7 +650,7 @@ static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp) * hard link. */ if (c->origin) { - err = ovl_set_origin(ofs, c->lowerpath.dentry, temp); + err = ovl_set_origin_fh(ofs, c->origin_fh, temp); if (err) return err; } @@ -719,21 +732,19 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) .link = c->link }; - /* workdir and destdir could be the same when copying up to indexdir */ - err = -EIO; - if (lock_rename(c->workdir, c->destdir) != NULL) - goto unlock; - err = ovl_prep_cu_creds(c->dentry, &cc); if (err) - goto unlock; + return err; + ovl_start_write(c->dentry); + inode_lock(wdir); temp = ovl_create_temp(ofs, c->workdir, &cattr); + inode_unlock(wdir); + ovl_end_write(c->dentry); ovl_revert_cu_creds(&cc); - err = PTR_ERR(temp); if (IS_ERR(temp)) - goto unlock; + return PTR_ERR(temp); /* * Copy up data first and then xattrs. Writing data after @@ -741,15 +752,28 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) */ path.dentry = temp; err = ovl_copy_up_data(c, &path); - if (err) + /* + * We cannot hold lock_rename() throughout this helper, because or + * lock ordering with sb_writers, which shouldn't be held when calling + * ovl_copy_up_data(), so lock workdir and destdir and make sure that + * temp wasn't moved before copy up completion or cleanup. + * If temp was moved, abort without the cleanup. + */ + ovl_start_write(c->dentry); + if (lock_rename(c->workdir, c->destdir) != NULL || + temp->d_parent != c->workdir) { + err = -EIO; + goto unlock; + } else if (err) { goto cleanup; + } err = ovl_copy_up_metadata(c, temp); if (err) goto cleanup; if (S_ISDIR(c->stat.mode) && c->indexed) { - err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp); + err = ovl_create_index(c->dentry, c->origin_fh, temp); if (err) goto cleanup; } @@ -779,6 +803,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) ovl_set_flag(OVL_WHITEOUTS, inode); unlock: unlock_rename(c->workdir, c->destdir); + ovl_end_write(c->dentry); return err; @@ -802,9 +827,10 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c) if (err) return err; + ovl_start_write(c->dentry); tmpfile = ovl_do_tmpfile(ofs, c->workdir, c->stat.mode); + ovl_end_write(c->dentry); ovl_revert_cu_creds(&cc); - if (IS_ERR(tmpfile)) return PTR_ERR(tmpfile); @@ -815,9 +841,11 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c) goto out_fput; } + ovl_start_write(c->dentry); + err = ovl_copy_up_metadata(c, temp); if (err) - goto out_fput; + goto out; inode_lock_nested(udir, I_MUTEX_PARENT); @@ -831,7 +859,7 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c) inode_unlock(udir); if (err) - goto out_fput; + goto out; if (c->metacopy_digest) ovl_set_flag(OVL_HAS_DIGEST, d_inode(c->dentry)); @@ -843,6 +871,8 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c) ovl_set_upperdata(d_inode(c->dentry)); ovl_inode_update(d_inode(c->dentry), dget(temp)); +out: + ovl_end_write(c->dentry); out_fput: fput(tmpfile); return err; @@ -861,6 +891,8 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) { int err; struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); + struct dentry *origin = c->lowerpath.dentry; + struct ovl_fh *fh = NULL; bool to_index = false; /* @@ -877,25 +909,35 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) to_index = true; } - if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index) + if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index) { + fh = ovl_get_origin_fh(ofs, origin); + if (IS_ERR(fh)) + return PTR_ERR(fh); + + /* origin_fh may be NULL */ + c->origin_fh = fh; c->origin = true; + } if (to_index) { c->destdir = ovl_indexdir(c->dentry->d_sb); - err = ovl_get_index_name(ofs, c->lowerpath.dentry, &c->destname); + err = ovl_get_index_name(ofs, origin, &c->destname); if (err) - return err; + goto out_free_fh; } else if (WARN_ON(!c->parent)) { /* Disconnected dentry must be copied up to index dir */ - return -EIO; + err = -EIO; + goto out_free_fh; } else { /* * Mark parent "impure" because it may now contain non-pure * upper */ + ovl_start_write(c->dentry); err = ovl_set_impure(c->parent, c->destdir); + ovl_end_write(c->dentry); if (err) - return err; + goto out_free_fh; } /* Should we copyup with O_TMPFILE or with workdir? */ @@ -909,6 +951,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) if (c->indexed) ovl_set_flag(OVL_INDEX, d_inode(c->dentry)); + ovl_start_write(c->dentry); if (to_index) { /* Initialize nlink for copy up of disconnected dentry */ err = ovl_set_nlink_upper(c->dentry); @@ -923,10 +966,13 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) ovl_dentry_set_upper_alias(c->dentry); ovl_dentry_update_reval(c->dentry, ovl_dentry_upper(c->dentry)); } + ovl_end_write(c->dentry); out: if (to_index) kfree(c->destname.name); +out_free_fh: + kfree(fh); return err; } @@ -1011,15 +1057,16 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c) * Writing to upper file will clear security.capability xattr. We * don't want that to happen for normal copy-up operation. */ + ovl_start_write(c->dentry); if (capability) { err = ovl_do_setxattr(ofs, upperpath.dentry, XATTR_NAME_CAPS, capability, cap_size, 0); - if (err) - goto out_free; } - - - err = ovl_removexattr(ofs, upperpath.dentry, OVL_XATTR_METACOPY); + if (!err) { + err = ovl_removexattr(ofs, upperpath.dentry, + OVL_XATTR_METACOPY); + } + ovl_end_write(c->dentry); if (err) goto out_free; @@ -1170,17 +1217,10 @@ static bool ovl_open_need_copy_up(struct dentry *dentry, int flags) int ovl_maybe_copy_up(struct dentry *dentry, int flags) { - int err = 0; - - if (ovl_open_need_copy_up(dentry, flags)) { - err = ovl_want_write(dentry); - if (!err) { - err = ovl_copy_up_flags(dentry, flags); - ovl_drop_write(dentry); - } - } + if (!ovl_open_need_copy_up(dentry, flags)) + return 0; - return err; + return ovl_copy_up_flags(dentry, flags); } int ovl_copy_up_with_data(struct dentry *dentry) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 033fc0458a3d..aab3f5d93556 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -477,7 +477,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, goto out_unlock; err = -ESTALE; - if (d_is_negative(upper) || !IS_WHITEOUT(d_inode(upper))) + if (d_is_negative(upper) || !ovl_upper_is_whiteout(ofs, upper)) goto out_dput; newdentry = ovl_create_temp(ofs, workdir, cattr); @@ -559,10 +559,6 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, struct cred *override_cred; struct dentry *parent = dentry->d_parent; - err = ovl_copy_up(parent); - if (err) - return err; - old_cred = ovl_override_creds(dentry->d_sb); /* @@ -626,6 +622,10 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev, .link = link, }; + err = ovl_copy_up(dentry->d_parent); + if (err) + return err; + err = ovl_want_write(dentry); if (err) goto out; @@ -700,28 +700,24 @@ static int ovl_link(struct dentry *old, struct inode *newdir, int err; struct inode *inode; - err = ovl_want_write(old); + err = ovl_copy_up(old); if (err) goto out; - err = ovl_copy_up(old); + err = ovl_copy_up(new->d_parent); if (err) - goto out_drop_write; + goto out; - err = ovl_copy_up(new->d_parent); + err = ovl_nlink_start(old); if (err) - goto out_drop_write; + goto out; if (ovl_is_metacopy_dentry(old)) { err = ovl_set_link_redirect(old); if (err) - goto out_drop_write; + goto out_nlink_end; } - err = ovl_nlink_start(old); - if (err) - goto out_drop_write; - inode = d_inode(old); ihold(inode); @@ -731,9 +727,8 @@ static int ovl_link(struct dentry *old, struct inode *newdir, if (err) iput(inode); +out_nlink_end: ovl_nlink_end(old); -out_drop_write: - ovl_drop_write(old); out: return err; } @@ -891,17 +886,13 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) goto out; } - err = ovl_want_write(dentry); - if (err) - goto out; - err = ovl_copy_up(dentry->d_parent); if (err) - goto out_drop_write; + goto out; err = ovl_nlink_start(dentry); if (err) - goto out_drop_write; + goto out; old_cred = ovl_override_creds(dentry->d_sb); if (!lower_positive) @@ -926,8 +917,6 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) if (ovl_dentry_upper(dentry)) ovl_copyattr(d_inode(dentry)); -out_drop_write: - ovl_drop_write(dentry); out: ovl_cache_free(&list); return err; @@ -1131,29 +1120,32 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir, } } - err = ovl_want_write(old); - if (err) - goto out; - err = ovl_copy_up(old); if (err) - goto out_drop_write; + goto out; err = ovl_copy_up(new->d_parent); if (err) - goto out_drop_write; + goto out; if (!overwrite) { err = ovl_copy_up(new); if (err) - goto out_drop_write; + goto out; } else if (d_inode(new)) { err = ovl_nlink_start(new); if (err) - goto out_drop_write; + goto out; update_nlink = true; } + if (!update_nlink) { + /* ovl_nlink_start() took ovl_want_write() */ + err = ovl_want_write(old); + if (err) + goto out; + } + old_cred = ovl_override_creds(old->d_sb); if (!list_empty(&list)) { @@ -1219,7 +1211,7 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir, } } else { if (!d_is_negative(newdentry)) { - if (!new_opaque || !ovl_is_whiteout(newdentry)) + if (!new_opaque || !ovl_upper_is_whiteout(ofs, newdentry)) goto out_dput; } else { if (flags & RENAME_EXCHANGE) @@ -1286,8 +1278,8 @@ out_revert_creds: revert_creds(old_cred); if (update_nlink) ovl_nlink_end(new); -out_drop_write: - ovl_drop_write(old); + else + ovl_drop_write(old); out: dput(opaquedir); ovl_cache_free(&list); diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 26b782c53910..7e16bbcad95e 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -23,12 +23,7 @@ static int ovl_encode_maybe_copy_up(struct dentry *dentry) if (ovl_dentry_upper(dentry)) return 0; - err = ovl_want_write(dentry); - if (!err) { - err = ovl_copy_up(dentry); - ovl_drop_write(dentry); - } - + err = ovl_copy_up(dentry); if (err) { pr_warn_ratelimited("failed to copy up on encode (%pd2, err=%i)\n", dentry, err); diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index ec3671ca140c..131621daeb13 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -15,10 +15,15 @@ #include <linux/fs.h> #include "overlayfs.h" +#include "../internal.h" /* for sb_init_dio_done_wq */ + struct ovl_aio_req { struct kiocb iocb; refcount_t ref; struct kiocb *orig_iocb; + /* used for aio completion */ + struct work_struct work; + long res; }; static struct kmem_cache *ovl_aio_request_cachep; @@ -235,6 +240,12 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence) return ret; } +static void ovl_file_modified(struct file *file) +{ + /* Update size/mtime */ + ovl_copyattr(file_inode(file)); +} + static void ovl_file_accessed(struct file *file) { struct inode *inode, *upperinode; @@ -263,20 +274,12 @@ static void ovl_file_accessed(struct file *file) touch_atime(&file->f_path); } -static rwf_t ovl_iocb_to_rwf(int ifl) +#define OVL_IOCB_MASK \ + (IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND) + +static rwf_t iocb_to_rw_flags(int flags) { - rwf_t flags = 0; - - if (ifl & IOCB_NOWAIT) - flags |= RWF_NOWAIT; - if (ifl & IOCB_HIPRI) - flags |= RWF_HIPRI; - if (ifl & IOCB_DSYNC) - flags |= RWF_DSYNC; - if (ifl & IOCB_SYNC) - flags |= RWF_SYNC; - - return flags; + return (__force rwf_t)(flags & OVL_IOCB_MASK); } static inline void ovl_aio_put(struct ovl_aio_req *aio_req) @@ -293,10 +296,8 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) struct kiocb *orig_iocb = aio_req->orig_iocb; if (iocb->ki_flags & IOCB_WRITE) { - struct inode *inode = file_inode(orig_iocb->ki_filp); - kiocb_end_write(iocb); - ovl_copyattr(inode); + ovl_file_modified(orig_iocb->ki_filp); } orig_iocb->ki_pos = iocb->ki_pos; @@ -313,6 +314,37 @@ static void ovl_aio_rw_complete(struct kiocb *iocb, long res) orig_iocb->ki_complete(orig_iocb, res); } +static void ovl_aio_complete_work(struct work_struct *work) +{ + struct ovl_aio_req *aio_req = container_of(work, + struct ovl_aio_req, work); + + ovl_aio_rw_complete(&aio_req->iocb, aio_req->res); +} + +static void ovl_aio_queue_completion(struct kiocb *iocb, long res) +{ + struct ovl_aio_req *aio_req = container_of(iocb, + struct ovl_aio_req, iocb); + struct kiocb *orig_iocb = aio_req->orig_iocb; + + /* + * Punt to a work queue to serialize updates of mtime/size. + */ + aio_req->res = res; + INIT_WORK(&aio_req->work, ovl_aio_complete_work); + queue_work(file_inode(orig_iocb->ki_filp)->i_sb->s_dio_done_wq, + &aio_req->work); +} + +static int ovl_init_aio_done_wq(struct super_block *sb) +{ + if (sb->s_dio_done_wq) + return 0; + + return sb_init_dio_done_wq(sb); +} + static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; @@ -334,8 +366,9 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) old_cred = ovl_override_creds(file_inode(file)->i_sb); if (is_sync_kiocb(iocb)) { - ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, - ovl_iocb_to_rwf(iocb->ki_flags)); + rwf_t rwf = iocb_to_rw_flags(iocb->ki_flags); + + ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, rwf); } else { struct ovl_aio_req *aio_req; @@ -401,15 +434,20 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) old_cred = ovl_override_creds(file_inode(file)->i_sb); if (is_sync_kiocb(iocb)) { + rwf_t rwf = iocb_to_rw_flags(ifl); + file_start_write(real.file); - ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, - ovl_iocb_to_rwf(ifl)); + ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, rwf); file_end_write(real.file); /* Update size */ - ovl_copyattr(inode); + ovl_file_modified(file); } else { struct ovl_aio_req *aio_req; + ret = ovl_init_aio_done_wq(inode->i_sb); + if (ret) + goto out; + ret = -ENOMEM; aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL); if (!aio_req) @@ -418,7 +456,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) aio_req->orig_iocb = iocb; kiocb_clone(&aio_req->iocb, iocb, get_file(real.file)); aio_req->iocb.ki_flags = ifl; - aio_req->iocb.ki_complete = ovl_aio_rw_complete; + aio_req->iocb.ki_complete = ovl_aio_queue_completion; refcount_set(&aio_req->ref, 2); kiocb_start_write(&aio_req->iocb); ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter); @@ -492,7 +530,7 @@ static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, file_end_write(real.file); /* Update size */ - ovl_copyattr(inode); + ovl_file_modified(out); revert_creds(old_cred); fdput(real); @@ -573,7 +611,7 @@ static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len revert_creds(old_cred); /* Update size */ - ovl_copyattr(inode); + ovl_file_modified(file); fdput(real); @@ -657,7 +695,7 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in, revert_creds(old_cred); /* Update size */ - ovl_copyattr(inode_out); + ovl_file_modified(file_out); fdput(real_in); fdput(real_out); diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index b6e98a7d36ce..345b8f161ca4 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -32,10 +32,6 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (err) return err; - err = ovl_want_write(dentry); - if (err) - goto out; - if (attr->ia_valid & ATTR_SIZE) { /* Truncate should trigger data copy up as well */ full_copy_up = true; @@ -54,7 +50,7 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, winode = d_inode(upperdentry); err = get_write_access(winode); if (err) - goto out_drop_write; + goto out; } if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) @@ -78,6 +74,10 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, */ attr->ia_valid &= ~ATTR_OPEN; + err = ovl_want_write(dentry); + if (err) + goto out_put_write; + inode_lock(upperdentry->d_inode); old_cred = ovl_override_creds(dentry->d_sb); err = ovl_do_notify_change(ofs, upperdentry, attr); @@ -85,12 +85,12 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (!err) ovl_copyattr(dentry->d_inode); inode_unlock(upperdentry->d_inode); + ovl_drop_write(dentry); +out_put_write: if (winode) put_write_access(winode); } -out_drop_write: - ovl_drop_write(dentry); out: return err; } @@ -339,130 +339,6 @@ static const char *ovl_get_link(struct dentry *dentry, return p; } -bool ovl_is_private_xattr(struct super_block *sb, const char *name) -{ - struct ovl_fs *ofs = OVL_FS(sb); - - if (ofs->config.userxattr) - return strncmp(name, OVL_XATTR_USER_PREFIX, - sizeof(OVL_XATTR_USER_PREFIX) - 1) == 0; - else - return strncmp(name, OVL_XATTR_TRUSTED_PREFIX, - sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) == 0; -} - -int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - int err; - struct ovl_fs *ofs = OVL_FS(dentry->d_sb); - struct dentry *upperdentry = ovl_i_dentry_upper(inode); - struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); - struct path realpath; - const struct cred *old_cred; - - err = ovl_want_write(dentry); - if (err) - goto out; - - if (!value && !upperdentry) { - ovl_path_lower(dentry, &realpath); - old_cred = ovl_override_creds(dentry->d_sb); - err = vfs_getxattr(mnt_idmap(realpath.mnt), realdentry, name, NULL, 0); - revert_creds(old_cred); - if (err < 0) - goto out_drop_write; - } - - if (!upperdentry) { - err = ovl_copy_up(dentry); - if (err) - goto out_drop_write; - - realdentry = ovl_dentry_upper(dentry); - } - - old_cred = ovl_override_creds(dentry->d_sb); - if (value) { - err = ovl_do_setxattr(ofs, realdentry, name, value, size, - flags); - } else { - WARN_ON(flags != XATTR_REPLACE); - err = ovl_do_removexattr(ofs, realdentry, name); - } - revert_creds(old_cred); - - /* copy c/mtime */ - ovl_copyattr(inode); - -out_drop_write: - ovl_drop_write(dentry); -out: - return err; -} - -int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, - void *value, size_t size) -{ - ssize_t res; - const struct cred *old_cred; - struct path realpath; - - ovl_i_path_real(inode, &realpath); - old_cred = ovl_override_creds(dentry->d_sb); - res = vfs_getxattr(mnt_idmap(realpath.mnt), realpath.dentry, name, value, size); - revert_creds(old_cred); - return res; -} - -static bool ovl_can_list(struct super_block *sb, const char *s) -{ - /* Never list private (.overlay) */ - if (ovl_is_private_xattr(sb, s)) - return false; - - /* List all non-trusted xattrs */ - if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) - return true; - - /* list other trusted for superuser only */ - return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN); -} - -ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) -{ - struct dentry *realdentry = ovl_dentry_real(dentry); - ssize_t res; - size_t len; - char *s; - const struct cred *old_cred; - - old_cred = ovl_override_creds(dentry->d_sb); - res = vfs_listxattr(realdentry, list, size); - revert_creds(old_cred); - if (res <= 0 || size == 0) - return res; - - /* filter out private xattrs */ - for (s = list, len = res; len;) { - size_t slen = strnlen(s, len) + 1; - - /* underlying fs providing us with an broken xattr list? */ - if (WARN_ON(slen > len)) - return -EIO; - - len -= slen; - if (!ovl_can_list(dentry->d_sb, s)) { - res -= slen; - memmove(s, s + slen, len); - } else { - s += slen; - } - } - - return res; -} - #ifdef CONFIG_FS_POSIX_ACL /* * Apply the idmapping of the layer to POSIX ACLs. The caller must pass a clone @@ -611,10 +487,6 @@ static int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode, struct dentry *upperdentry = ovl_dentry_upper(dentry); struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); - err = ovl_want_write(dentry); - if (err) - return err; - /* * If ACL is to be removed from a lower file, check if it exists in * the first place before copying it up. @@ -630,7 +502,7 @@ static int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode, revert_creds(old_cred); if (IS_ERR(real_acl)) { err = PTR_ERR(real_acl); |
