summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/overlayfs.rst40
-rw-r--r--fs/overlayfs/Makefile2
-rw-r--r--fs/overlayfs/copy_up.c142
-rw-r--r--fs/overlayfs/dir.c64
-rw-r--r--fs/overlayfs/export.c7
-rw-r--r--fs/overlayfs/file.c88
-rw-r--r--fs/overlayfs/inode.c165
-rw-r--r--fs/overlayfs/namei.c52
-rw-r--r--fs/overlayfs/overlayfs.h72
-rw-r--r--fs/overlayfs/params.c322
-rw-r--r--fs/overlayfs/params.h1
-rw-r--r--fs/overlayfs/readdir.c27
-rw-r--r--fs/overlayfs/super.c92
-rw-r--r--fs/overlayfs/util.c115
-rw-r--r--fs/overlayfs/xattrs.c271
-rw-r--r--fs/super.c1
16 files changed, 929 insertions, 532 deletions
diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst
index 5b93268e400f..0407f361f32a 100644
--- a/Documentation/filesystems/overlayfs.rst
+++ b/Documentation/filesystems/overlayfs.rst
@@ -344,10 +344,11 @@ escaping the colons with a single backslash. For example:
mount -t overlay overlay -olowerdir=/a\:lower\:\:dir /merged
-Since kernel version v6.5, directory names containing colons can also
-be provided as lower layer using the fsconfig syscall from new mount api:
+Since kernel version v6.8, directory names containing colons can also
+be configured as lower layer using the "lowerdir+" mount options and the
+fsconfig syscall from new mount api. For example:
- fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir", "/a:lower::dir", 0);
+ fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/a:lower::dir", 0);
In the latter case, colons in lower layer directory names will be escaped
as an octal characters (\072) when displayed in /proc/self/mountinfo.
@@ -416,6 +417,16 @@ Only the data of the files in the "data-only" lower layers may be visible
when a "metacopy" file in one of the lower layers above it, has a "redirect"
to the absolute path of the "lower data" file in the "data-only" lower layer.
+Since kernel version v6.8, "data-only" lower layers can also be added using
+the "datadir+" mount options and the fsconfig syscall from new mount api.
+For example:
+
+ fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/l1", 0);
+ fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/l2", 0);
+ fsconfig(fs_fd, FSCONFIG_SET_STRING, "lowerdir+", "/l3", 0);
+ fsconfig(fs_fd, FSCONFIG_SET_STRING, "datadir+", "/do1", 0);
+ fsconfig(fs_fd, FSCONFIG_SET_STRING, "datadir+", "/do2", 0);
+
fs-verity support
----------------------
@@ -504,6 +515,29 @@ directory tree on the same or different underlying filesystem, and even
to a different machine. With the "inodes index" feature, trying to mount
the copied layers will fail the verification of the lower root file handle.
+Nesting overlayfs mounts
+------------------------
+
+It is possible to use a lower directory that is stored on an overlayfs
+mount. For regular files this does not need any special care. However, files
+that have overlayfs attributes, such as whiteouts or "overlay.*" xattrs will be
+interpreted by the underlying overlayfs mount and stripped out. In order to
+allow the second overlayfs mount to see the attributes they must be escaped.
+
+Overlayfs specific xattrs are escaped by using a special prefix of
+"overlay.overlay.". So, a file with a "trusted.overlay.overlay.metacopy" xattr
+in the lower dir will be exposed as a regular file with a
+"trusted.overlay.metacopy" xattr in the overlayfs mount. This can be nested by
+repeating the prefix multiple time, as each instance only removes one prefix.
+
+A lower dir with a regular whiteout will always be handled by the overlayfs
+mount, so to support storing an effective whiteout file in an overlayfs mount an
+alternative form of whiteout is supported. This form is a regular, zero-size
+file with the "overlay.whiteout" xattr set, inside a directory with the
+"overlay.whiteouts" xattr set. Such whiteouts are never created by overlayfs,
+but can be used by userspace tools (like containers) that generate lower layers.
+These alternative whiteouts can be escaped using the standard xattr escape
+mechanism in order to properly nest to any depth.
Non-standard behavior
---------------------
diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile
index 4e173d56b11f..5648954f8588 100644
--- a/fs/overlayfs/Makefile
+++ b/fs/overlayfs/Makefile
@@ -6,4 +6,4 @@
obj-$(CONFIG_OVERLAY_FS) += overlay.o
overlay-objs := super.o namei.o util.o inode.o file.o dir.o readdir.o \
- copy_up.o export.o params.o
+ copy_up.o export.o params.o xattrs.o
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index ada3fcc9c6d5..4382881b0709 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -252,7 +252,9 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
return PTR_ERR(old_file);
/* Try to use clone_file_range to clone up within the same fs */
+ ovl_start_write(dentry);
cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
+ ovl_end_write(dentry);
if (cloned == len)
goto out_fput;
/* Couldn't clone, so now we try to copy the data */
@@ -287,8 +289,12 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
* it may not recognize all kind of holes and sometimes
* only skips partial of hole area. However, it will be
* enough for most of the use cases.
+ *
+ * We do not hold upper sb_writers throughout the loop to avert
+ * lockdep warning with llseek of lower file in nested overlay:
+ * - upper sb_writers
+ * -- lower ovl_inode_lock (ovl_llseek)
*/
-
if (skip_hole && data_pos < old_pos) {
data_pos = vfs_llseek(old_file, old_pos, SEEK_DATA);
if (data_pos > old_pos) {
@@ -303,9 +309,11 @@ static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
}
}
+ ovl_start_write(dentry);
bytes = do_splice_direct(old_file, &old_pos,
new_file, &new_pos,
this_len, SPLICE_F_MOVE);
+ ovl_end_write(dentry);
if (bytes <= 0) {
error = bytes;
break;
@@ -426,29 +434,29 @@ out_err:
return ERR_PTR(err);
}
-int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower,
- struct dentry *upper)
+struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin)
{
- const struct ovl_fh *fh = NULL;
- int err;
-
/*
* When lower layer doesn't support export operations store a 'null' fh,
* so we can use the overlay.origin xattr to distignuish between a copy
* up and a pure upper inode.
*/
- if (ovl_can_decode_fh(lower->d_sb)) {
- fh = ovl_encode_real_fh(ofs, lower, false);
- if (IS_ERR(fh))
- return PTR_ERR(fh);
- }
+ if (!ovl_can_decode_fh(origin->d_sb))
+ return NULL;
+
+ return ovl_encode_real_fh(ofs, origin, false);
+}
+
+int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh,
+ struct dentry *upper)
+{
+ int err;
/*
* Do not fail when upper doesn't support xattrs.
*/
err = ovl_check_setxattr(ofs, upper, OVL_XATTR_ORIGIN, fh->buf,
fh ? fh->fb.len : 0, 0);
- kfree(fh);
/* Ignore -EPERM from setting "user.*" on symlink/special */
return err == -EPERM ? 0 : err;
@@ -476,7 +484,7 @@ static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper,
*
* Caller must hold i_mutex on indexdir.
*/
-static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
+static int ovl_create_index(struct dentry *dentry, const struct ovl_fh *fh,
struct dentry *upper)
{
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
@@ -502,7 +510,7 @@ static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry))))
return -EIO;
- err = ovl_get_index_name(ofs, origin, &name);
+ err = ovl_get_index_name_fh(fh, &name);
if (err)
return err;
@@ -541,6 +549,7 @@ struct ovl_copy_up_ctx {
struct dentry *destdir;
struct qstr destname;
struct dentry *workdir;
+ const struct ovl_fh *origin_fh;
bool origin;
bool indexed;
bool metacopy;
@@ -555,14 +564,16 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
struct inode *udir = d_inode(upperdir);
+ ovl_start_write(c->dentry);
+
/* Mark parent "impure" because it may now contain non-pure upper */
err = ovl_set_impure(c->parent, upperdir);
if (err)
- return err;
+ goto out;
err = ovl_set_nlink_lower(c->dentry);
if (err)
- return err;
+ goto out;
inode_lock_nested(udir, I_MUTEX_PARENT);
upper = ovl_lookup_upper(ofs, c->dentry->d_name.name, upperdir,
@@ -581,10 +592,12 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
}
inode_unlock(udir);
if (err)
- return err;
+ goto out;
err = ovl_set_nlink_upper(c->dentry);
+out:
+ ovl_end_write(c->dentry);
return err;
}
@@ -637,7 +650,7 @@ static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp)
* hard link.
*/
if (c->origin) {
- err = ovl_set_origin(ofs, c->lowerpath.dentry, temp);
+ err = ovl_set_origin_fh(ofs, c->origin_fh, temp);
if (err)
return err;
}
@@ -719,21 +732,19 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
.link = c->link
};
- /* workdir and destdir could be the same when copying up to indexdir */
- err = -EIO;
- if (lock_rename(c->workdir, c->destdir) != NULL)
- goto unlock;
-
err = ovl_prep_cu_creds(c->dentry, &cc);
if (err)
- goto unlock;
+ return err;
+ ovl_start_write(c->dentry);
+ inode_lock(wdir);
temp = ovl_create_temp(ofs, c->workdir, &cattr);
+ inode_unlock(wdir);
+ ovl_end_write(c->dentry);
ovl_revert_cu_creds(&cc);
- err = PTR_ERR(temp);
if (IS_ERR(temp))
- goto unlock;
+ return PTR_ERR(temp);
/*
* Copy up data first and then xattrs. Writing data after
@@ -741,15 +752,28 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
*/
path.dentry = temp;
err = ovl_copy_up_data(c, &path);
- if (err)
+ /*
+ * We cannot hold lock_rename() throughout this helper, because or
+ * lock ordering with sb_writers, which shouldn't be held when calling
+ * ovl_copy_up_data(), so lock workdir and destdir and make sure that
+ * temp wasn't moved before copy up completion or cleanup.
+ * If temp was moved, abort without the cleanup.
+ */
+ ovl_start_write(c->dentry);
+ if (lock_rename(c->workdir, c->destdir) != NULL ||
+ temp->d_parent != c->workdir) {
+ err = -EIO;
+ goto unlock;
+ } else if (err) {
goto cleanup;
+ }
err = ovl_copy_up_metadata(c, temp);
if (err)
goto cleanup;
if (S_ISDIR(c->stat.mode) && c->indexed) {
- err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp);
+ err = ovl_create_index(c->dentry, c->origin_fh, temp);
if (err)
goto cleanup;
}
@@ -779,6 +803,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
ovl_set_flag(OVL_WHITEOUTS, inode);
unlock:
unlock_rename(c->workdir, c->destdir);
+ ovl_end_write(c->dentry);
return err;
@@ -802,9 +827,10 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
if (err)
return err;
+ ovl_start_write(c->dentry);
tmpfile = ovl_do_tmpfile(ofs, c->workdir, c->stat.mode);
+ ovl_end_write(c->dentry);
ovl_revert_cu_creds(&cc);
-
if (IS_ERR(tmpfile))
return PTR_ERR(tmpfile);
@@ -815,9 +841,11 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
goto out_fput;
}
+ ovl_start_write(c->dentry);
+
err = ovl_copy_up_metadata(c, temp);
if (err)
- goto out_fput;
+ goto out;
inode_lock_nested(udir, I_MUTEX_PARENT);
@@ -831,7 +859,7 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
inode_unlock(udir);
if (err)
- goto out_fput;
+ goto out;
if (c->metacopy_digest)
ovl_set_flag(OVL_HAS_DIGEST, d_inode(c->dentry));
@@ -843,6 +871,8 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
ovl_set_upperdata(d_inode(c->dentry));
ovl_inode_update(d_inode(c->dentry), dget(temp));
+out:
+ ovl_end_write(c->dentry);
out_fput:
fput(tmpfile);
return err;
@@ -861,6 +891,8 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
{
int err;
struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
+ struct dentry *origin = c->lowerpath.dentry;
+ struct ovl_fh *fh = NULL;
bool to_index = false;
/*
@@ -877,25 +909,35 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
to_index = true;
}
- if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index)
+ if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index) {
+ fh = ovl_get_origin_fh(ofs, origin);
+ if (IS_ERR(fh))
+ return PTR_ERR(fh);
+
+ /* origin_fh may be NULL */
+ c->origin_fh = fh;
c->origin = true;
+ }
if (to_index) {
c->destdir = ovl_indexdir(c->dentry->d_sb);
- err = ovl_get_index_name(ofs, c->lowerpath.dentry, &c->destname);
+ err = ovl_get_index_name(ofs, origin, &c->destname);
if (err)
- return err;
+ goto out_free_fh;
} else if (WARN_ON(!c->parent)) {
/* Disconnected dentry must be copied up to index dir */
- return -EIO;
+ err = -EIO;
+ goto out_free_fh;
} else {
/*
* Mark parent "impure" because it may now contain non-pure
* upper
*/
+ ovl_start_write(c->dentry);
err = ovl_set_impure(c->parent, c->destdir);
+ ovl_end_write(c->dentry);
if (err)
- return err;
+ goto out_free_fh;
}
/* Should we copyup with O_TMPFILE or with workdir? */
@@ -909,6 +951,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
if (c->indexed)
ovl_set_flag(OVL_INDEX, d_inode(c->dentry));
+ ovl_start_write(c->dentry);
if (to_index) {
/* Initialize nlink for copy up of disconnected dentry */
err = ovl_set_nlink_upper(c->dentry);
@@ -923,10 +966,13 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
ovl_dentry_set_upper_alias(c->dentry);
ovl_dentry_update_reval(c->dentry, ovl_dentry_upper(c->dentry));
}
+ ovl_end_write(c->dentry);
out:
if (to_index)
kfree(c->destname.name);
+out_free_fh:
+ kfree(fh);
return err;
}
@@ -1011,15 +1057,16 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
* Writing to upper file will clear security.capability xattr. We
* don't want that to happen for normal copy-up operation.
*/
+ ovl_start_write(c->dentry);
if (capability) {
err = ovl_do_setxattr(ofs, upperpath.dentry, XATTR_NAME_CAPS,
capability, cap_size, 0);
- if (err)
- goto out_free;
}
-
-
- err = ovl_removexattr(ofs, upperpath.dentry, OVL_XATTR_METACOPY);
+ if (!err) {
+ err = ovl_removexattr(ofs, upperpath.dentry,
+ OVL_XATTR_METACOPY);
+ }
+ ovl_end_write(c->dentry);
if (err)
goto out_free;
@@ -1170,17 +1217,10 @@ static bool ovl_open_need_copy_up(struct dentry *dentry, int flags)
int ovl_maybe_copy_up(struct dentry *dentry, int flags)
{
- int err = 0;
-
- if (ovl_open_need_copy_up(dentry, flags)) {
- err = ovl_want_write(dentry);
- if (!err) {
- err = ovl_copy_up_flags(dentry, flags);
- ovl_drop_write(dentry);
- }
- }
+ if (!ovl_open_need_copy_up(dentry, flags))
+ return 0;
- return err;
+ return ovl_copy_up_flags(dentry, flags);
}
int ovl_copy_up_with_data(struct dentry *dentry)
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 033fc0458a3d..aab3f5d93556 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -477,7 +477,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
goto out_unlock;
err = -ESTALE;
- if (d_is_negative(upper) || !IS_WHITEOUT(d_inode(upper)))
+ if (d_is_negative(upper) || !ovl_upper_is_whiteout(ofs, upper))
goto out_dput;
newdentry = ovl_create_temp(ofs, workdir, cattr);
@@ -559,10 +559,6 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
struct cred *override_cred;
struct dentry *parent = dentry->d_parent;
- err = ovl_copy_up(parent);
- if (err)
- return err;
-
old_cred = ovl_override_creds(dentry->d_sb);
/*
@@ -626,6 +622,10 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
.link = link,
};
+ err = ovl_copy_up(dentry->d_parent);
+ if (err)
+ return err;
+
err = ovl_want_write(dentry);
if (err)
goto out;
@@ -700,28 +700,24 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
int err;
struct inode *inode;
- err = ovl_want_write(old);
+ err = ovl_copy_up(old);
if (err)
goto out;
- err = ovl_copy_up(old);
+ err = ovl_copy_up(new->d_parent);
if (err)
- goto out_drop_write;
+ goto out;
- err = ovl_copy_up(new->d_parent);
+ err = ovl_nlink_start(old);
if (err)
- goto out_drop_write;
+ goto out;
if (ovl_is_metacopy_dentry(old)) {
err = ovl_set_link_redirect(old);
if (err)
- goto out_drop_write;
+ goto out_nlink_end;
}
- err = ovl_nlink_start(old);
- if (err)
- goto out_drop_write;
-
inode = d_inode(old);
ihold(inode);
@@ -731,9 +727,8 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
if (err)
iput(inode);
+out_nlink_end:
ovl_nlink_end(old);
-out_drop_write:
- ovl_drop_write(old);
out:
return err;
}
@@ -891,17 +886,13 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
goto out;
}
- err = ovl_want_write(dentry);
- if (err)
- goto out;
-
err = ovl_copy_up(dentry->d_parent);
if (err)
- goto out_drop_write;
+ goto out;
err = ovl_nlink_start(dentry);
if (err)
- goto out_drop_write;
+ goto out;
old_cred = ovl_override_creds(dentry->d_sb);
if (!lower_positive)
@@ -926,8 +917,6 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
if (ovl_dentry_upper(dentry))
ovl_copyattr(d_inode(dentry));
-out_drop_write:
- ovl_drop_write(dentry);
out:
ovl_cache_free(&list);
return err;
@@ -1131,29 +1120,32 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
}
}
- err = ovl_want_write(old);
- if (err)
- goto out;
-
err = ovl_copy_up(old);
if (err)
- goto out_drop_write;
+ goto out;
err = ovl_copy_up(new->d_parent);
if (err)
- goto out_drop_write;
+ goto out;
if (!overwrite) {
err = ovl_copy_up(new);
if (err)
- goto out_drop_write;
+ goto out;
} else if (d_inode(new)) {
err = ovl_nlink_start(new);
if (err)
- goto out_drop_write;
+ goto out;
update_nlink = true;
}
+ if (!update_nlink) {
+ /* ovl_nlink_start() took ovl_want_write() */
+ err = ovl_want_write(old);
+ if (err)
+ goto out;
+ }
+
old_cred = ovl_override_creds(old->d_sb);
if (!list_empty(&list)) {
@@ -1219,7 +1211,7 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
}
} else {
if (!d_is_negative(newdentry)) {
- if (!new_opaque || !ovl_is_whiteout(newdentry))
+ if (!new_opaque || !ovl_upper_is_whiteout(ofs, newdentry))
goto out_dput;
} else {
if (flags & RENAME_EXCHANGE)
@@ -1286,8 +1278,8 @@ out_revert_creds:
revert_creds(old_cred);
if (update_nlink)
ovl_nlink_end(new);
-out_drop_write:
- ovl_drop_write(old);
+ else
+ ovl_drop_write(old);
out:
dput(opaquedir);
ovl_cache_free(&list);
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 26b782c53910..7e16bbcad95e 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -23,12 +23,7 @@ static int ovl_encode_maybe_copy_up(struct dentry *dentry)
if (ovl_dentry_upper(dentry))
return 0;
- err = ovl_want_write(dentry);
- if (!err) {
- err = ovl_copy_up(dentry);
- ovl_drop_write(dentry);
- }
-
+ err = ovl_copy_up(dentry);
if (err) {
pr_warn_ratelimited("failed to copy up on encode (%pd2, err=%i)\n",
dentry, err);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index ec3671ca140c..131621daeb13 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -15,10 +15,15 @@
#include <linux/fs.h>
#include "overlayfs.h"
+#include "../internal.h" /* for sb_init_dio_done_wq */
+
struct ovl_aio_req {
struct kiocb iocb;
refcount_t ref;
struct kiocb *orig_iocb;
+ /* used for aio completion */
+ struct work_struct work;
+ long res;
};
static struct kmem_cache *ovl_aio_request_cachep;
@@ -235,6 +240,12 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
return ret;
}
+static void ovl_file_modified(struct file *file)
+{
+ /* Update size/mtime */
+ ovl_copyattr(file_inode(file));
+}
+
static void ovl_file_accessed(struct file *file)
{
struct inode *inode, *upperinode;
@@ -263,20 +274,12 @@ static void ovl_file_accessed(struct file *file)
touch_atime(&file->f_path);
}
-static rwf_t ovl_iocb_to_rwf(int ifl)
+#define OVL_IOCB_MASK \
+ (IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)
+
+static rwf_t iocb_to_rw_flags(int flags)
{
- rwf_t flags = 0;
-
- if (ifl & IOCB_NOWAIT)
- flags |= RWF_NOWAIT;
- if (ifl & IOCB_HIPRI)
- flags |= RWF_HIPRI;
- if (ifl & IOCB_DSYNC)
- flags |= RWF_DSYNC;
- if (ifl & IOCB_SYNC)
- flags |= RWF_SYNC;
-
- return flags;
+ return (__force rwf_t)(flags & OVL_IOCB_MASK);
}
static inline void ovl_aio_put(struct ovl_aio_req *aio_req)
@@ -293,10 +296,8 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
struct kiocb *orig_iocb = aio_req->orig_iocb;
if (iocb->ki_flags & IOCB_WRITE) {
- struct inode *inode = file_inode(orig_iocb->ki_filp);
-
kiocb_end_write(iocb);
- ovl_copyattr(inode);
+ ovl_file_modified(orig_iocb->ki_filp);
}
orig_iocb->ki_pos = iocb->ki_pos;
@@ -313,6 +314,37 @@ static void ovl_aio_rw_complete(struct kiocb *iocb, long res)
orig_iocb->ki_complete(orig_iocb, res);
}
+static void ovl_aio_complete_work(struct work_struct *work)
+{
+ struct ovl_aio_req *aio_req = container_of(work,
+ struct ovl_aio_req, work);
+
+ ovl_aio_rw_complete(&aio_req->iocb, aio_req->res);
+}
+
+static void ovl_aio_queue_completion(struct kiocb *iocb, long res)
+{
+ struct ovl_aio_req *aio_req = container_of(iocb,
+ struct ovl_aio_req, iocb);
+ struct kiocb *orig_iocb = aio_req->orig_iocb;
+
+ /*
+ * Punt to a work queue to serialize updates of mtime/size.
+ */
+ aio_req->res = res;
+ INIT_WORK(&aio_req->work, ovl_aio_complete_work);
+ queue_work(file_inode(orig_iocb->ki_filp)->i_sb->s_dio_done_wq,
+ &aio_req->work);
+}
+
+static int ovl_init_aio_done_wq(struct super_block *sb)
+{
+ if (sb->s_dio_done_wq)
+ return 0;
+
+ return sb_init_dio_done_wq(sb);
+}
+
static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
@@ -334,8 +366,9 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
old_cred = ovl_override_creds(file_inode(file)->i_sb);
if (is_sync_kiocb(iocb)) {
- ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
- ovl_iocb_to_rwf(iocb->ki_flags));
+ rwf_t rwf = iocb_to_rw_flags(iocb->ki_flags);
+
+ ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, rwf);
} else {
struct ovl_aio_req *aio_req;
@@ -401,15 +434,20 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
old_cred = ovl_override_creds(file_inode(file)->i_sb);
if (is_sync_kiocb(iocb)) {
+ rwf_t rwf = iocb_to_rw_flags(ifl);
+
file_start_write(real.file);
- ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
- ovl_iocb_to_rwf(ifl));
+ ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, rwf);
file_end_write(real.file);
/* Update size */
- ovl_copyattr(inode);
+ ovl_file_modified(file);
} else {
struct ovl_aio_req *aio_req;
+ ret = ovl_init_aio_done_wq(inode->i_sb);
+ if (ret)
+ goto out;
+
ret = -ENOMEM;
aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
if (!aio_req)
@@ -418,7 +456,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
aio_req->orig_iocb = iocb;
kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
aio_req->iocb.ki_flags = ifl;
- aio_req->iocb.ki_complete = ovl_aio_rw_complete;
+ aio_req->iocb.ki_complete = ovl_aio_queue_completion;
refcount_set(&aio_req->ref, 2);
kiocb_start_write(&aio_req->iocb);
ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
@@ -492,7 +530,7 @@ static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
file_end_write(real.file);
/* Update size */
- ovl_copyattr(inode);
+ ovl_file_modified(out);
revert_creds(old_cred);
fdput(real);
@@ -573,7 +611,7 @@ static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len
revert_creds(old_cred);
/* Update size */
- ovl_copyattr(inode);
+ ovl_file_modified(file);
fdput(real);
@@ -657,7 +695,7 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
revert_creds(old_cred);
/* Update size */
- ovl_copyattr(inode_out);
+ ovl_file_modified(file_out);
fdput(real_in);
fdput(real_out);
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index b6e98a7d36ce..345b8f161ca4 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -32,10 +32,6 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
if (err)
return err;
- err = ovl_want_write(dentry);
- if (err)
- goto out;
-
if (attr->ia_valid & ATTR_SIZE) {
/* Truncate should trigger data copy up as well */
full_copy_up = true;
@@ -54,7 +50,7 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
winode = d_inode(upperdentry);
err = get_write_access(winode);
if (err)
- goto out_drop_write;
+ goto out;
}
if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
@@ -78,6 +74,10 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
*/
attr->ia_valid &= ~ATTR_OPEN;
+ err = ovl_want_write(dentry);
+ if (err)
+ goto out_put_write;
+
inode_lock(upperdentry->d_inode);
old_cred = ovl_override_creds(dentry->d_sb);
err = ovl_do_notify_change(ofs, upperdentry, attr);
@@ -85,12 +85,12 @@ int ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
if (!err)
ovl_copyattr(dentry->d_inode);
inode_unlock(upperdentry->d_inode);
+ ovl_drop_write(dentry);
+out_put_write:
if (winode)
put_write_access(winode);
}
-out_drop_write:
- ovl_drop_write(dentry);
out:
return err;
}
@@ -339,130 +339,6 @@ static const char *ovl_get_link(struct dentry *dentry,
return p;
}
-bool ovl_is_private_xattr(struct super_block *sb, const char *name)
-{
- struct ovl_fs *ofs = OVL_FS(sb);
-
- if (ofs->config.userxattr)
- return strncmp(name, OVL_XATTR_USER_PREFIX,
- sizeof(OVL_XATTR_USER_PREFIX) - 1) == 0;
- else
- return strncmp(name, OVL_XATTR_TRUSTED_PREFIX,
- sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) == 0;
-}
-
-int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
-{
- int err;
- struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
- struct dentry *upperdentry = ovl_i_dentry_upper(inode);
- struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry);
- struct path realpath;
- const struct cred *old_cred;
-
- err = ovl_want_write(dentry);
- if (err)
- goto out;
-
- if (!value && !upperdentry) {
- ovl_path_lower(dentry, &realpath);
- old_cred = ovl_override_creds(dentry->d_sb);
- err = vfs_getxattr(mnt_idmap(realpath.mnt), realdentry, name, NULL, 0);
- revert_creds(old_cred);
- if (err < 0)
- goto out_drop_write;
- }
-
- if (!upperdentry) {
- err = ovl_copy_up(dentry);
- if (err)
- goto out_drop_write;
-
- realdentry = ovl_dentry_upper(dentry);
- }
-
- old_cred = ovl_override_creds(dentry->d_sb);
- if (value) {
- err = ovl_do_setxattr(ofs, realdentry, name, value, size,
- flags);
- } else {
- WARN_ON(flags != XATTR_REPLACE);
- err = ovl_do_removexattr(ofs, realdentry, name);
- }
- revert_creds(old_cred);
-
- /* copy c/mtime */
- ovl_copyattr(inode);
-
-out_drop_write:
- ovl_drop_write(dentry);
-out:
- return err;
-}
-
-int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
- void *value, size_t size)
-{
- ssize_t res;
- const struct cred *old_cred;
- struct path realpath;
-
- ovl_i_path_real(inode, &realpath);
- old_cred = ovl_override_creds(dentry->d_sb);
- res = vfs_getxattr(mnt_idmap(realpath.mnt), realpath.dentry, name, value, size);
- revert_creds(old_cred);
- return res;
-}
-
-static bool ovl_can_list(struct super_block *sb, const char *s)
-{
- /* Never list private (.overlay) */
- if (ovl_is_private_xattr(sb, s))
- return false;
-
- /* List all non-trusted xattrs */
- if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0)
- return true;
-
- /* list other trusted for superuser only */
- return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
-}
-
-ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
-{
- struct dentry *realdentry = ovl_dentry_real(dentry);
- ssize_t res;
- size_t len;
- char *s;
- const struct cred *old_cred;
-
- old_cred = ovl_override_creds(dentry->d_sb);
- res = vfs_listxattr(realdentry, list, size);
- revert_creds(old_cred);
- if (res <= 0 || size == 0)
- return res;
-
- /* filter out private xattrs */
- for (s = list, len = res; len;) {
- size_t slen = strnlen(s, len) + 1;
-
- /* underlying fs providing us with an broken xattr list? */
- if (WARN_ON(slen > len))
- return -EIO;
-
- len -= slen;
- if (!ovl_can_list(dentry->d_sb, s)) {
- res -= slen;
- memmove(s, s + slen, len);
- } else {
- s += slen;
- }
- }
-
- return res;
-}
-
#ifdef CONFIG_FS_POSIX_ACL
/*
* Apply the idmapping of the layer to POSIX ACLs. The caller must pass a clone
@@ -611,10 +487,6 @@ static int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode,
struct dentry *upperdentry = ovl_dentry_upper(dentry);
struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry);
- err = ovl_want_write(dentry);
- if (err)
- return err;
-
/*
* If ACL is to be removed from a lower file, check if it exists in
* the first place before copying it up.
@@ -630,7 +502,7 @@ static int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode,
revert_creds(old_cred);
if (IS_ERR(real_acl)) {
err = PTR_ERR(real_acl);