summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-08-28 10:17:14 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-08-28 10:17:14 -0700
commitde16588a7737b12e63ec646d72b45befb2b1f8f7 (patch)
tree719d3271039eca6ca315201e0cd882658133a2f8
parentecd7db20474c3859d4d01f34aaabf41bd28c7d84 (diff)
parente6fa4c728fb671765291cca3a905986612c06b6e (diff)
downloadlinux-de16588a7737b12e63ec646d72b45befb2b1f8f7.tar.gz
linux-de16588a7737b12e63ec646d72b45befb2b1f8f7.tar.bz2
linux-de16588a7737b12e63ec646d72b45befb2b1f8f7.zip
Merge tag 'v6.6-vfs.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull misc vfs updates from Christian Brauner: "This contains the usual miscellaneous features, cleanups, and fixes for vfs and individual filesystems. Features: - Block mode changes on symlinks and rectify our broken semantics - Report file modifications via fsnotify() for splice - Allow specifying an explicit timeout for the "rootwait" kernel command line option. This allows to timeout and reboot instead of always waiting indefinitely for the root device to show up - Use synchronous fput for the close system call Cleanups: - Get rid of open-coded lockdep workarounds for async io submitters and replace it all with a single consolidated helper - Simplify epoll allocation helper - Convert simple_write_begin and simple_write_end to use a folio - Convert page_cache_pipe_buf_confirm() to use a folio - Simplify __range_close to avoid pointless locking - Disable per-cpu buffer head cache for isolated cpus - Port ecryptfs to kmap_local_page() api - Remove redundant initialization of pointer buf in pipe code - Unexport the d_genocide() function which is only used within core vfs - Replace printk(KERN_ERR) and WARN_ON() with WARN() Fixes: - Fix various kernel-doc issues - Fix refcount underflow for eventfds when used as EFD_SEMAPHORE - Fix a mainly theoretical issue in devpts - Check the return value of __getblk() in reiserfs - Fix a racy assert in i_readcount_dec - Fix integer conversion issues in various functions - Fix LSM security context handling during automounts that prevented NFS superblock sharing" * tag 'v6.6-vfs.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (39 commits) cachefiles: use kiocb_{start,end}_write() helpers ovl: use kiocb_{start,end}_write() helpers aio: use kiocb_{start,end}_write() helpers io_uring: use kiocb_{start,end}_write() helpers fs: create kiocb_{start,end}_write() helpers fs: add kerneldoc to file_{start,end}_write() helpers io_uring: rename kiocb_end_write() local helper splice: Convert page_cache_pipe_buf_confirm() to use a folio libfs: Convert simple_write_begin and simple_write_end to use a folio fs/dcache: Replace printk and WARN_ON by WARN fs/pipe: remove redundant initialization of pointer buf fs: Fix kernel-doc warnings devpts: Fix kernel-doc warnings doc: idmappings: fix an error and rephrase a paragraph init: Add support for rootwait timeout parameter vfs: fix up the assert in i_readcount_dec fs: Fix one kernel-doc comment docs: filesystems: idmappings: clarify from where idmappings are taken fs/buffer.c: disable per-CPU buffer_head cache for isolated CPUs vfs, security: Fix automount superblock LSM init problem, preventing NFS sb sharing ...
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt4
-rw-r--r--Documentation/filesystems/idmappings.rst14
-rw-r--r--fs/aio.c20
-rw-r--r--fs/attr.c20
-rw-r--r--fs/buffer.c7
-rw-r--r--fs/cachefiles/io.c16
-rw-r--r--fs/dcache.c5
-rw-r--r--fs/devpts/inode.c10
-rw-r--r--fs/ecryptfs/crypto.c8
-rw-r--r--fs/ecryptfs/mmap.c5
-rw-r--r--fs/ecryptfs/read_write.c12
-rw-r--r--fs/eventfd.c2
-rw-r--r--fs/eventpoll.c12
-rw-r--r--fs/fcntl.c29
-rw-r--r--fs/file.c30
-rw-r--r--fs/file_table.c5
-rw-r--r--fs/fs_context.c35
-rw-r--r--fs/ioctl.c10
-rw-r--r--fs/kernel_read_file.c12
-rw-r--r--fs/libfs.c42
-rw-r--r--fs/locks.c20
-rw-r--r--fs/namei.c5
-rw-r--r--fs/nfs/nfs4_fs.h2
-rw-r--r--fs/nfs/nfs4file.c2
-rw-r--r--fs/nfs/nfs4proc.c4
-rw-r--r--fs/notify/dnotify/dnotify.c4
-rw-r--r--fs/open.c31
-rw-r--r--fs/overlayfs/file.c10
-rw-r--r--fs/pipe.c8
-rw-r--r--fs/read_write.c2
-rw-r--r--fs/reiserfs/journal.c4
-rw-r--r--fs/smb/client/cifsfs.c2
-rw-r--r--fs/splice.c64
-rw-r--r--include/linux/dnotify.h4
-rw-r--r--include/linux/filelock.h12
-rw-r--r--include/linux/fs.h60
-rw-r--r--include/linux/lsm_hook_defs.h1
-rw-r--r--include/linux/pipe_fs_i.h4
-rw-r--r--include/linux/security.h6
-rw-r--r--init/do_mounts.c38
-rw-r--r--io_uring/rw.c33
-rw-r--r--security/security.c14
-rw-r--r--security/selinux/hooks.c22
-rw-r--r--security/smack/smack_lsm.c51
44 files changed, 458 insertions, 243 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 722b6eca2e93..6ff63638ed82 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5522,6 +5522,10 @@
Useful for devices that are detected asynchronously
(e.g. USB and MMC devices).
+ rootwait= [KNL] Maximum time (in seconds) to wait for root device
+ to show up before attempting to mount the root
+ filesystem.
+
rproc_mem=nn[KMG][@address]
[KNL,ARM,CMA] Remoteproc physical memory block.
Memory area to be used by remote processor image,
diff --git a/Documentation/filesystems/idmappings.rst b/Documentation/filesystems/idmappings.rst
index ad6d21640576..d095c5838f94 100644
--- a/Documentation/filesystems/idmappings.rst
+++ b/Documentation/filesystems/idmappings.rst
@@ -146,9 +146,10 @@ For the rest of this document we will prefix all userspace ids with ``u`` and
all kernel ids with ``k``. Ranges of idmappings will be prefixed with ``r``. So
an idmapping will be written as ``u0:k10000:r10000``.
-For example, the id ``u1000`` is an id in the upper idmapset or "userspace
-idmapset" starting with ``u1000``. And it is mapped to ``k11000`` which is a
-kernel id in the lower idmapset or "kernel idmapset" starting with ``k10000``.
+For example, within this idmapping, the id ``u1000`` is an id in the upper
+idmapset or "userspace idmapset" starting with ``u0``. And it is mapped to
+``k11000`` which is a kernel id in the lower idmapset or "kernel idmapset"
+starting with ``k10000``.
A kernel id is always created by an idmapping. Such idmappings are associated
with user namespaces. Since we mainly care about how idmappings work we're not
@@ -373,6 +374,13 @@ kernel maps the caller's userspace id down into a kernel id according to the
caller's idmapping and then maps that kernel id up according to the
filesystem's idmapping.
+From the implementation point it's worth mentioning how idmappings are represented.
+All idmappings are taken from the corresponding user namespace.
+
+ - caller's idmapping (usually taken from ``current_user_ns()``)
+ - filesystem's idmapping (``sb->s_user_ns``)
+ - mount's idmapping (``mnt_idmap(vfsmnt)``)
+
Let's see some examples with caller/filesystem idmapping but without mount
idmappings. This will exhibit some problems we can hit. After that we will
revisit/reconsider these examples, this time using mount idmappings, to see how
diff --git a/fs/aio.c b/fs/aio.c
index 77e33619de40..b3174da80ff6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1447,13 +1447,8 @@ static void aio_complete_rw(struct kiocb *kiocb, long res)
if (kiocb->ki_flags & IOCB_WRITE) {
struct inode *inode = file_inode(kiocb->ki_filp);
- /*
- * Tell lockdep we inherited freeze protection from submission
- * thread.
- */
if (S_ISREG(inode->i_mode))
- __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
- file_end_write(kiocb->ki_filp);
+ kiocb_end_write(kiocb);
}
iocb->ki_res.res = res;
@@ -1581,17 +1576,8 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
return ret;
ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
if (!ret) {
- /*
- * Open-code file_start_write here to grab freeze protection,
- * which will be released by another thread in
- * aio_complete_rw(). Fool lockdep by telling it the lock got
- * released so that it doesn't complain about the held lock when
- * we return to userspace.
- */
- if (S_ISREG(file_inode(file)->i_mode)) {
- sb_start_write(file_inode(file)->i_sb);
- __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
- }
+ if (S_ISREG(file_inode(file)->i_mode))
+ kiocb_start_write(req);
req->ki_flags |= IOCB_WRITE;
aio_rw_done(req, call_write_iter(file, req, &iter));
}
diff --git a/fs/attr.c b/fs/attr.c
index 599f6d14c0ed..a8ae5f6d9b16 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -394,9 +394,25 @@ int notify_change(struct mnt_idmap *idmap, struct dentry *dentry,
return error;
if ((ia_valid & ATTR_MODE)) {
- umode_t amode = attr->ia_mode;
+ /*
+ * Don't allow changing the mode of symlinks:
+ *
+ * (1) The vfs doesn't take the mode of symlinks into account
+ * during permission checking.
+ * (2) This has never worked correctly. Most major filesystems
+ * did return EOPNOTSUPP due to interactions with POSIX ACLs
+ * but did still updated the mode of the symlink.
+ * This inconsistency led system call wrapper providers such
+ * as libc to block changing the mode of symlinks with
+ * EOPNOTSUPP already.
+ * (3) To even do this in the first place one would have to use
+ * specific file descriptors and quite some effort.
+ */
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
/* Flag setting protected by i_mutex */
- if (is_sxid(amode))
+ if (is_sxid(attr->ia_mode))
inode->i_flags &= ~S_NOSEC;
}
diff --git a/fs/buffer.c b/fs/buffer.c
index bd091329026c..3aa2380a75e1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -49,6 +49,7 @@
#include <trace/events/block.h>
#include <linux/fscrypt.h>
#include <linux/fsverity.h>
+#include <linux/sched/isolation.h>
#include "internal.h"
@@ -1352,7 +1353,7 @@ static void bh_lru_install(struct buffer_head *bh)
* failing page migration.
* Skip putting upcoming bh into bh_lru until migration is done.
*/
- if (lru_cache_disabled()) {
+ if (lru_cache_disabled() || cpu_is_isolated(smp_processor_id())) {
bh_lru_unlock();
return;
}
@@ -1382,6 +1383,10 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
check_irqs_on();
bh_lru_lock();
+ if (cpu_is_isolated(smp_processor_id())) {
+ bh_lru_unlock();
+ return NULL;
+ }
for (i = 0; i < BH_LRU_SIZE; i++) {
struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index 175a25fcade8..009d23cd435b 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -259,9 +259,7 @@ static void cachefiles_write_complete(struct kiocb *iocb, long ret)
_enter("%ld", ret);
- /* Tell lockdep we inherited freeze protection from submission thread */
- __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
- __sb_end_write(inode->i_sb, SB_FREEZE_WRITE);
+ kiocb_end_write(iocb);
if (ret < 0)
trace_cachefiles_io_error(object, inode, ret,
@@ -286,7 +284,6 @@ int __cachefiles_write(struct cachefiles_object *object,
{
struct cachefiles_cache *cache;
struct cachefiles_kiocb *ki;
- struct inode *inode;
unsigned int old_nofs;
ssize_t ret;
size_t len = iov_iter_count(iter);
@@ -322,19 +319,12 @@ int __cachefiles_write(struct cachefiles_object *object,
ki->iocb.ki_complete = cachefiles_write_complete;
atomic_long_add(ki->b_writing, &cache->b_writing);
- /* Open-code file_start_write here to grab freeze protection, which
- * will be released by another thread in aio_complete_rw(). Fool
- * lockdep by telling it the lock got released so that it doesn't
- * complain about the held lock when we return to userspace.
- */
- inode = file_inode(file);
- __sb_start_write(inode->i_sb, SB_FREEZE_WRITE);
- __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
+ kiocb_start_write(&ki->iocb);
get_file(ki->iocb.ki_filp);
cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
- trace_cachefiles_write(object, inode, ki->iocb.ki_pos, len);
+ trace_cachefiles_write(object, file_inode(file), ki->iocb.ki_pos, len);
old_nofs = memalloc_nofs_save();
ret = cachefiles_inject_write_error();
if (ret == 0)
diff --git a/fs/dcache.c b/fs/dcache.c
index 52e6d5fdab6b..25ac74d30bff 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1664,7 +1664,7 @@ static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
if (dentry == _data && dentry->d_lockref.count == 1)
return D_WALK_CONTINUE;
- printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%pd} "
+ WARN(1, "BUG: Dentry %p{i=%lx,n=%pd} "
" still in use (%d) [unmount of %s %s]\n",
dentry,
dentry->d_inode ?
@@ -1673,7 +1673,6 @@ static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
dentry->d_lockref.count,
dentry->d_sb->s_type->name,
dentry->d_sb->s_id);
- WARN_ON(1);
return D_WALK_CONTINUE;
}
@@ -3247,8 +3246,6 @@ void d_genocide(struct dentry *parent)
d_walk(parent, parent, d_genocide_kill);
}
-EXPORT_SYMBOL(d_genocide);
-
void d_tmpfile(struct file *file, struct inode *inode)
{
struct dentry *dentry = file->f_path.dentry;
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 5ede89880911..299c295a27a0 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -534,12 +534,12 @@ void devpts_kill_index(struct pts_fs_info *fsi, int idx)
/**
* devpts_pty_new -- create a new inode in /dev/pts/
- * @ptmx_inode: inode of the master
- * @device: major+minor of the node to be created
+ * @fsi: Filesystem info for this instance.
* @index: used as a name of the node
* @priv: what's given back by devpts_get_priv
*
- * The created inode is returned. Remove it from /dev/pts/ by devpts_pty_kill.
+ * The dentry for the created inode is returned.
+ * Remove it from /dev/pts/ with devpts_pty_kill().
*/
struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv)
{
@@ -580,7 +580,7 @@ struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv)
/**
* devpts_get_priv -- get private data for a slave
- * @pts_inode: inode of the slave
+ * @dentry: dentry of the slave
*
* Returns whatever was passed as priv in devpts_pty_new for a given inode.
*/
@@ -593,7 +593,7 @@ void *devpts_get_priv(struct dentry *dentry)
/**
* devpts_pty_kill -- remove inode form /dev/pts/
- * @inode: inode of the slave to be removed
+ * @dentry: dentry of the slave to be removed
*
* This is an inverse operation of devpts_pty_new.
*/
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index c16f0d660cb7..03bd55069d86 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -441,10 +441,10 @@ int ecryptfs_encrypt_page(struct page *page)
}
lower_offset = lower_offset_for_page(crypt_stat, page);
- enc_extent_virt = kmap(enc_extent_page);
+ enc_extent_virt = kmap_local_page(enc_extent_page);
rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, lower_offset,
PAGE_SIZE);
- kunmap(enc_extent_page);
+ kunmap_local(enc_extent_virt);
if (rc < 0) {
ecryptfs_printk(KERN_ERR,
"Error attempting to write lower page; rc = [%d]\n",
@@ -490,10 +490,10 @@ int ecryptfs_decrypt_page(struct page *page)
BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED));
lower_offset = lower_offset_for_page(crypt_stat, page);
- page_virt = kmap(page);
+ page_virt = kmap_local_page(page);
rc = ecryptfs_read_lower(page_virt, lower_offset, PAGE_SIZE,
ecryptfs_inode);
- kunmap(page);
+ kunmap_local(page_virt);
if (rc < 0) {
ecryptfs_printk(KERN_ERR,
"Error attempting to read lower page; rc = [%d]\n",
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 373c3e5747e6..e2483acc4366 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -125,7 +125,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
/* This is a header extent */
char *page_virt;
- page_virt = kmap_atomic(page);
+ page_virt = kmap_local_page(page);
memset(page_virt, 0, PAGE_SIZE);
/* TODO: Support more than one header extent */
if (view_extent_num == 0) {
@@ -138,7 +138,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
crypt_stat,
&written);
}
- kunmap_atomic(page_virt);
+ kunmap_local(page_virt);
flush_dcache_page(page);
if (rc) {
printk(KERN_ERR "%s: Error reading xattr "
@@ -255,7 +255,6 @@ out:
* @mapping: The eCryptfs object
* @pos: The file offset at which to start writing
* @len: Length of the write
- * @flags: Various flags
* @pagep: Pointer to return the page
* @fsdata: Pointer to return fs data (unused)
*
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 60bdcaddcbe5..3458f153a588 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -64,11 +64,11 @@ int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode,
offset = ((((loff_t)page_for_lower->index) << PAGE_SHIFT)
+ offset_in_page);
- virt = kmap(page_for_lower);
+ virt = kmap_local_page(page_for_lower);
rc = ecryptfs_write_lower(ecryptfs_inode, virt, offset, size);
if (rc > 0)
rc = 0;
- kunmap(page_for_lower);
+ kunmap_local(virt);
return rc;
}
@@ -140,7 +140,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
ecryptfs_page_idx, rc);
goto out;
}
- ecryptfs_page_virt = kmap_atomic(ecryptfs_page);
+ ecryptfs_page_virt = kmap_local_page(ecryptfs_page);
/*
* pos: where we're now writing, offset: where the request was
@@ -163,7 +163,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
(data + data_offset), num_bytes);
data_offset += num_bytes;
}
- kunmap_atomic(ecryptfs_page_virt);
+ kunmap_local(ecryptfs_page_virt);
flush_dcache_page(ecryptfs_page);
SetPageUptodate(ecryptfs_page);
unlock_page(ecryptfs_page);
@@ -253,11 +253,11 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
int rc;
offset = ((((loff_t)page_index) << PAGE_SHIFT) + offset_in_page);
- virt = kmap(page_for_ecryptfs);
+ virt = kmap_local_page(page_for_ecryptfs);
rc = ecryptfs_read_lower(virt, offset, size, ecryptfs_inode);
if (rc > 0)
rc = 0;
- kunmap(page_for_ecryptfs);
+ kunmap_local(virt);
flush_dcache_page(page_for_ecryptfs);
return rc;
}
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 8aa36cd37351..33a918f9566c 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -189,7 +189,7 @@ void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
{
lockdep_assert_held(&ctx->wqh.lock);
- *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
+ *cnt = ((ctx->flags & EFD_SEMAPHORE) && ctx->count) ? 1 : ctx->count;
ctx->count -= *cnt;
}
EXPORT_SYMBOL_GPL(eventfd_ctx_do_read);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 4b1b3362f697..1d9a71a0c4c1 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -975,15 +975,11 @@ again:
static int ep_alloc(struct eventpoll **pep)
{
- int error;
- struct user_struct *user;
struct eventpoll *ep;
- user = get_current_user();
- error = -ENOMEM;
ep = kzalloc(sizeof(*ep), GFP_KERNEL);
if (unlikely(!ep))
- goto free_uid;
+ return -ENOMEM;
mutex_init(&ep->mtx);
rwlock_init(&ep->lock);
@@ -992,16 +988,12 @@ static int ep_alloc(struct eventpoll **pep)
INIT_LIST_HEAD(&ep->rdllist);
ep->rbr = RB_ROOT_CACHED;
ep->ovflist = EP_UNACTIVE_PTR;
- ep->user = user;
+ ep->user = get_current_user();
refcount_set(&ep->refcount, 1);
*pep = ep;
return 0;
-
-free_uid:
- free_uid(user);
- return error;
}
/*
diff --git a/fs/fcntl.c b/fs/fcntl.c
index b622be119706..e871009f6c88 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -34,7 +34,7 @@
#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
-static int setfl(int fd, struct file * filp, unsigned long arg)
+static int setfl(int fd, struct file * filp, unsigned int arg)
{
struct inode * inode = file_inode(filp);
int error = 0;
@@ -112,11 +112,11 @@ void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
}
EXPORT_SYMBOL(__f_setown);
-int f_setown(struct file *filp, unsigned long arg, int force)
+int f_setown(struct file *filp, int who, int force)
{
enum pid_type type;
struct pid *pid = NULL;
- int who = arg, ret = 0;
+ int ret = 0;
type = PIDTYPE_TGID;
if (who < 0) {
@@ -317,28 +317,29 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
struct file *filp)
{
void __user *argp = (void __user *)arg;
+ int argi = (int)arg;
struct flock flock;
long err = -EINVAL;
switch (cmd) {
case F_DUPFD:
- err = f_dupfd(arg, filp, 0);
+ err = f_dupfd(argi, filp, 0);
break;
case F_DUPFD_CLOEXEC:
- err = f_dupfd(arg, filp, O_CLOEXEC);
+ err = f_dupfd(argi, filp, O_CLOEXEC);
break;
case F_GETFD:
err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
break;
case F_SETFD:
err = 0;
- set_close_on_exec(fd, arg & FD_CLOEXEC);
+ set_close_on_exec(fd, argi & FD_CLOEXEC);
break;
case F_GETFL:
err = filp->f_flags;
break;
case F_SETFL:
- err = setfl(fd, filp, arg);
+ err = setfl(fd, filp, argi);
break;
#if BITS_PER_LONG != 32
/* 32-bit arches must use fcntl64() */
@@ -375,7 +376,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
force_successful_syscall_return();
break;
case F_SETOWN:
- err = f_setown(filp, arg, 1);
+ err = f_setown(filp, argi, 1);
break;
case F_GETOWN_EX:
err = f_getown_ex(filp, arg);
@@ -391,28 +392,28 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
break;
case F_SETSIG:
/* arg == 0 restores default behaviour. */
- if (!valid_signal(arg)) {
+ if (!valid_signal(argi)) {
break;
}
err = 0;
- filp->f_owner.signum = arg;
+ filp->f_owner.signum = argi;
break;
case F_GETLEASE:
err = fcntl_getlease(filp);
break;
case F_SETLEASE:
- err = fcntl_setlease(fd, filp, arg);
+ err = fcntl_setlease(fd, filp, argi);
break;
case F_NOTIFY:
- err = fcntl_dirnotify(fd, filp, arg);
+ err = fcntl_dirnotify(fd, filp, argi);
break;
case F_SETPIPE_SZ:
case F_GETPIPE_SZ:
- err = pipe_fcntl(filp, cmd, arg);
+ err = pipe_fcntl(filp, cmd, argi);
break;
case F_ADD_SEALS:
case F_GET_SEALS:
- err = memfd_fcntl(filp, cmd, arg);
+ err = memfd_fcntl(filp, cmd, argi);
break;
case F_GET_RW_HINT:
case F_SET_RW_HINT:
diff --git a/fs/file.c b/fs/file.c
index 3fd003a8604f..3e4a4dfa38fc 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -668,7 +668,7 @@ EXPORT_SYMBOL(close_fd); /* for ksys_close() */
/**
* last_fd - return last valid index into fd table
- * @cur_fds: files struct
+ * @fdt: File descriptor table.
*
* Context: Either rcu read lock or files_lock must be held.
*
@@ -693,29 +693,30 @@ static inline void __range_cloexec(struct files_struct *cur_fds,
spin_unlock(&cur_fds->file_lock);
}
-static inline void __range_close(struct files_struct *cur_fds, unsigned int fd,
+static inline void __range_close(struct files_struct *files, unsigned int fd,
unsigned int max_fd)
{
+ struct file *file;
unsigned n;
- rcu_read_lock();
- n = last_fd(files_fdtable(cur_fds));
- rcu_read_unlock();
+ spin_lock(&files->file_lock);
+ n = last_fd(files_fdtable(files));
max_fd = min(max_fd, n);
- while (fd <= max_fd) {
- struct file *file;
-
- spin_lock(&cur_fds->file_lock);
- file = pick_file(cur_fds, fd++);
- spin_unlock(&cur_fds->file_lock);
-
+ for (; fd <= max_fd; fd++) {
+ file = pick_file(files, fd);
if (file) {
- /* found a valid file to close */
- filp_close(file, cur_fds);
+ spin_unlock(&files->file_lock);
+ filp_close(file, files);
cond_resched();
+ spin_lock(&files->file_lock);
+ } else if (need_resched()) {
+ spin_unlock(&files->file_lock);
+ cond_resched();
+ spin_lock(&files->file_lock);
}
}
+ spin_unlock(&files->file_lock);
}
/**
@@ -723,6 +724,7 @@ static inline void __range_close(struct files_struct *cur_fds, unsigned int fd,
*
* @fd: starting file descriptor to close
* @max_fd: last file descriptor to close
+ * @flags: CLOSE_RANGE flags.
*
* This closes a range of file descriptors. All file descriptors
* from @fd up to and including @max_fd are closed.
diff --git a/fs/file_table.c b/fs/file_table.c
index fc7d677ff5ad..ee21b3da9d08 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -461,11 +461,8 @@ void fput(struct file *file)
*/
void __fput_sync(struct file *file)
{
- if (atomic_long_dec_and_test(&file->f_count)) {
- struct task_struct *task = current;
- BUG_ON(!(task->flags & PF_KTHREAD));
+ if (atomic_long_dec_and_test(&file->f_count))
__fput(file);
- }
}
EXPORT_SYMBOL(fput);
diff --git a/fs/fs_context.c b/fs/fs_context.c
index 30d82d2979af..a0ad7a0c4680 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -162,6 +162,10 @@ EXPORT_SYMBOL(vfs_parse_fs_param);
/**
* vfs_parse_fs_string - Convenience function to just parse a string.
+ * @fc: Filesystem context.
+ * @key: Parameter name.
+ * @value: Default value.
+ * @v_size: Maximum number of bytes in the value.
*/
int vfs_parse_fs_string(struct fs_context *fc, const char *key,
const char *value, size_t v_size)
@@ -189,7 +193,7 @@ EXPORT_SYMBOL(vfs_parse_fs_string);
/**
* generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data
- * @ctx: The superblock configuration to fill in.
+ * @fc: The superblock configuration to fill in.
* @data: The data to parse
*
* Parse a blob of data that's in key[=val][,key[=val]]* form. This can be
@@ -315,10 +319,31 @@ struct fs_context *fs_context_for_reconfigure(struct dentry *dentry,
}
EXPORT_SYMBOL(fs_context_for_reconfigure);
+/**
+ * fs_context_for_submount: allocate a new fs_context for a submount
+ * @type: file_system_type of the new context
+ * @reference: reference dentry from which to copy relevant info
+ *
+ * Allocate a new fs_context s