summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/fiemap.rst49
-rw-r--r--drivers/virt/vboxguest/Kconfig2
-rw-r--r--fs/dcache.c3
-rw-r--r--fs/erofs/zdata.c3
-rw-r--r--fs/ext4/inode.c3
-rw-r--r--fs/ext4/namei.c4
-rw-r--r--fs/file.c22
-rw-r--r--fs/file_table.c7
-rw-r--r--fs/fs_context.c2
-rw-r--r--fs/gfs2/quota.c3
-rw-r--r--fs/namei.c34
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/pipe.c19
-rw-r--r--fs/pnode.c8
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/namespaces.c2
-rw-r--r--fs/select.c4
-rw-r--r--fs/vboxsf/Kconfig2
-rw-r--r--include/linux/fiemap.h16
-rw-r--r--include/linux/fs.h29
-rw-r--r--include/linux/lockref.h26
-rw-r--r--include/linux/mount.h4
-rw-r--r--include/linux/seqlock.h2
-rw-r--r--include/uapi/linux/fiemap.h47
-rw-r--r--include/uapi/linux/fs.h6
-rw-r--r--kernel/watch_queue.c4
-rw-r--r--lib/lockref.c60
-rw-r--r--mm/shmem.c6
-rw-r--r--security/apparmor/apparmorfs.c2
-rw-r--r--tools/testing/selftests/coredump/Makefile7
-rw-r--r--tools/testing/selftests/coredump/README.rst50
-rwxr-xr-xtools/testing/selftests/coredump/stackdump14
-rw-r--r--tools/testing/selftests/coredump/stackdump_test.c151
33 files changed, 415 insertions, 180 deletions
diff --git a/Documentation/filesystems/fiemap.rst b/Documentation/filesystems/fiemap.rst
index 93fc96f760aa..23b3ed229e49 100644
--- a/Documentation/filesystems/fiemap.rst
+++ b/Documentation/filesystems/fiemap.rst
@@ -12,21 +12,10 @@ returns a list of extents.
Request Basics
--------------
-A fiemap request is encoded within struct fiemap::
-
- struct fiemap {
- __u64 fm_start; /* logical offset (inclusive) at
- * which to start mapping (in) */
- __u64 fm_length; /* logical length of mapping which
- * userspace cares about (in) */
- __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */
- __u32 fm_mapped_extents; /* number of extents that were
- * mapped (out) */
- __u32 fm_extent_count; /* size of fm_extents array (in) */
- __u32 fm_reserved;
- struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */
- };
+A fiemap request is encoded within struct fiemap:
+.. kernel-doc:: include/uapi/linux/fiemap.h
+ :identifiers: fiemap
fm_start, and fm_length specify the logical range within the file
which the process would like mappings for. Extents returned mirror
@@ -60,6 +49,8 @@ FIEMAP_FLAG_XATTR
If this flag is set, the extents returned will describe the inodes
extended attribute lookup tree, instead of its data tree.
+FIEMAP_FLAG_CACHE
+ This flag requests caching of the extents.
Extent Mapping
--------------
@@ -77,18 +68,10 @@ complete the requested range and will not have the FIEMAP_EXTENT_LAST
flag set (see the next section on extent flags).
Each extent is described by a single fiemap_extent structure as
-returned in fm_extents::
-
- struct fiemap_extent {
- __u64 fe_logical; /* logical offset in bytes for the start of
- * the extent */
- __u64 fe_physical; /* physical offset in bytes for the start
- * of the extent */
- __u64 fe_length; /* length in bytes for the extent */
- __u64 fe_reserved64[2];
- __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */
- __u32 fe_reserved[3];
- };
+returned in fm_extents:
+
+.. kernel-doc:: include/uapi/linux/fiemap.h
+ :identifiers: fiemap_extent
All offsets and lengths are in bytes and mirror those on disk. It is valid
for an extents logical offset to start before the request or its logical
@@ -175,6 +158,8 @@ FIEMAP_EXTENT_MERGED
userspace would be highly inefficient, the kernel will try to merge most
adjacent blocks into 'extents'.
+FIEMAP_EXTENT_SHARED
+ This flag is set to request that space be shared with other files.
VFS -> File System Implementation
---------------------------------
@@ -191,14 +176,10 @@ each discovered extent::
u64 len);
->fiemap is passed struct fiemap_extent_info which describes the
-fiemap request::
-
- struct fiemap_extent_info {
- unsigned int fi_flags; /* Flags as passed from user */
- unsigned int fi_extents_mapped; /* Number of mapped extents */
- unsigned int fi_extents_max; /* Size of fiemap_extent array */
- struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent array */
- };
+fiemap request:
+
+.. kernel-doc:: include/linux/fiemap.h
+ :identifiers: fiemap_extent_info
It is intended that the file system should not need to access any of this
structure directly. Filesystem handlers should be tolerant to signals and return
diff --git a/drivers/virt/vboxguest/Kconfig b/drivers/virt/vboxguest/Kconfig
index cc329887bfae..11b153e7454e 100644
--- a/drivers/virt/vboxguest/Kconfig
+++ b/drivers/virt/vboxguest/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
config VBOXGUEST
tristate "Virtual Box Guest integration support"
- depends on X86 && PCI && INPUT
+ depends on (ARM64 || X86) && PCI && INPUT
help
This is a driver for the Virtual Box Guest PCI device used in
Virtual Box virtual machines. Enabling this driver will add
diff --git a/fs/dcache.c b/fs/dcache.c
index b4d5e9e1e43d..1a01d7a6a7a9 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1681,9 +1681,8 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
/* Make sure we always see the terminating NUL character */
smp_store_release(&dentry->d_name.name, dname); /* ^^^ */
- dentry->d_lockref.count = 1;
dentry->d_flags = 0;
- spin_lock_init(&dentry->d_lock);
+ lockref_init(&dentry->d_lockref, 1);
seqcount_spinlock_init(&dentry->d_seq, &dentry->d_lock);
dentry->d_inode = NULL;
dentry->d_parent = dentry;
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 19ef4ff2a134..254f6ad2c336 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -747,8 +747,7 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
if (IS_ERR(pcl))
return PTR_ERR(pcl);
- spin_lock_init(&pcl->lockref.lock);
- pcl->lockref.count = 1; /* one ref for this request */
+ lockref_init(&pcl->lockref, 1); /* one ref for this request */
pcl->algorithmformat = map->m_algorithmformat;
pcl->length = 0;
pcl->partial = true;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 89aade6f45f6..7c54ae5fcbd4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5006,10 +5006,11 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
if (IS_ENCRYPTED(inode)) {
inode->i_op = &ext4_encrypted_symlink_inode_operations;
} else if (ext4_inode_is_fast_symlink(inode)) {
- inode->i_link = (char *)ei->i_data;
inode->i_op = &ext4_fast_symlink_inode_operations;
nd_terminate_link(ei->i_data, inode->i_size,
sizeof(ei->i_data) - 1);
+ inode_set_cached_link(inode, (char *)ei->i_data,
+ inode->i_size);
} else {
inode->i_op = &ext4_symlink_inode_operations;
}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index bcf2737078b8..536d56d15072 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3418,7 +3418,6 @@ retry:
inode->i_op = &ext4_symlink_inode_operations;
} else {
inode->i_op = &ext4_fast_symlink_inode_operations;
- inode->i_link = (char *)&EXT4_I(inode)->i_data;
}
}
@@ -3434,6 +3433,9 @@ retry:
disk_link.len);
inode->i_size = disk_link.len - 1;
EXT4_I(inode)->i_disksize = inode->i_size;
+ if (!IS_ENCRYPTED(inode))
+ inode_set_cached_link(inode, (char *)&EXT4_I(inode)->i_data,
+ inode->i_size);
}
err = ext4_add_nondir(handle, dentry, &inode);
if (handle)
diff --git a/fs/file.c b/fs/file.c
index 25c6e53b03f8..d868cdb95d1e 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -279,10 +279,6 @@ repeat:
if (nr < fdt->max_fds)
return 0;
- /* Can we expand? */
- if (nr >= sysctl_nr_open)
- return -EMFILE;
-
if (unlikely(files->resize_in_progress)) {
spin_unlock(&files->file_lock);
wait_event(files->resize_wait, !files->resize_in_progress);
@@ -290,6 +286,10 @@ repeat:
goto repeat;
}
+ /* Can we expand? */
+ if (unlikely(nr >= sysctl_nr_open))
+ return -EMFILE;
+
/* All good, so we try */
files->resize_in_progress = true;
error = expand_fdtable(files, nr);
@@ -1231,17 +1231,9 @@ __releases(&files->file_lock)
/*
* We need to detect attempts to do dup2() over allocated but still
- * not finished descriptor. NB: OpenBSD avoids that at the price of
- * extra work in their equivalent of fget() - they insert struct
- * file immediately after grabbing descriptor, mark it larval if
- * more work (e.g. actual opening) is needed and make sure that
- * fget() treats larval files as absent. Potentially interesting,
- * but while extra work in fget() is trivial, locking implications
- * and amount of surgery on open()-related paths in VFS are not.
- * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
- * deadlocks in rather amusing ways, AFAICS. All of that is out of
- * scope of POSIX or SUS, since neither considers shared descriptor
- * tables and this condition does not arise without those.
+ * not finished descriptor.
+ *
+ * POSIX is silent on the issue, we return -EBUSY.
*/
fdt = files_fdtable(files);
fd = array_index_nospec(fd, fdt->max_fds);
diff --git a/fs/file_table.c b/fs/file_table.c
index 976736be47cb..a32171d2b83f 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -128,7 +128,7 @@ static struct ctl_table fs_stat_sysctls[] = {
.data = &sysctl_nr_open,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_douintvec_minmax,
.extra1 = &sysctl_nr_open_min,
.extra2 = &sysctl_nr_open_max,
},
@@ -478,6 +478,8 @@ static void ____fput(struct callback_head *work)
__fput(container_of(work, struct file, f_task_work));
}
+static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
+
/*
* If kernel thread really needs to have the final fput() it has done
* to complete, call this. The only user right now is the boot - we
@@ -491,11 +493,10 @@ static void ____fput(struct callback_head *work)
void flush_delayed_fput(void)
{
delayed_fput(NULL);
+ flush_delayed_work(&delayed_fput_work);
}
EXPORT_SYMBOL_GPL(flush_delayed_fput);
-static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
-
void fput(struct file *file)
{
if (file_ref_put(&file->f_ref)) {
diff --git a/fs/fs_context.c b/fs/fs_context.c
index 98589aae5208..582d33e81117 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -493,7 +493,7 @@ static void put_fc_log(struct fs_context *fc)
if (log) {
if (refcount_dec_and_test(&log->usage)) {
fc->log.log = NULL;
- for (i = 0; i <= 7; i++)
+ for (i = 0; i < ARRAY_SIZE(log->buffer) ; i++)
if (log->need_free & (1 << i))
kfree(log->buffer[i]);
kfree(log);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 72b48f6f5561..58bc5013ca49 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -236,8 +236,7 @@ static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, str
return NULL;
qd->qd_sbd = sdp;
- qd->qd_lockref.count = 0;
- spin_lock_init(&qd->qd_lockref.lock);
+ lockref_init(&qd->qd_lockref, 0);
qd->qd_id = qid;
qd->qd_slot = -1;
INIT_LIST_HEAD(&qd->qd_lru);
diff --git a/fs/namei.c b/fs/namei.c
index 9d30c7aa9aa6..e56c29a22d26 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -5272,19 +5272,16 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna
getname(newname), 0);
}
-int readlink_copy(char __user *buffer, int buflen, const char *link)
+int readlink_copy(char __user *buffer, int buflen, const char *link, int linklen)
{
- int len = PTR_ERR(link);
- if (IS_ERR(link))
- goto out;
+ int copylen;
- len = strlen(link);
- if (len > (unsigned) buflen)
- len = buflen;
- if (copy_to_user(buffer, link, len))
- len = -EFAULT;
-out:
- return len;
+ copylen = linklen;
+ if (unlikely(copylen > (unsigned) buflen))
+ copylen = buflen;
+ if (copy_to_user(buffer, link, copylen))
+ copylen = -EFAULT;
+ return copylen;
}
/**
@@ -5304,6 +5301,9 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
const char *link;
int res;
+ if (inode->i_opflags & IOP_CACHED_LINK)
+ return readlink_copy(buffer, buflen, inode->i_link, inode->i_linklen);
+
if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) {
if (unlikely(inode->i_op->readlink))
return inode->i_op->readlink(dentry, buffer, buflen);
@@ -5322,7 +5322,7 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
if (IS_ERR(link))
return PTR_ERR(link);
}
- res = readlink_copy(buffer, buflen, link);
+ res = readlink_copy(buffer, buflen, link, strlen(link));
do_delayed_call(&done);
return res;
}
@@ -5391,10 +5391,14 @@ EXPORT_SYMBOL(page_put_link);
int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
{
+ const char *link;
+ int res;
+
DEFINE_DELAYED_CALL(done);
- int res = readlink_copy(buffer, buflen,
- page_get_link(dentry, d_inode(dentry),
- &done));
+ link = page_get_link(dentry, d_inode(dentry), &done);
+ res = PTR_ERR(link);
+ if (!IS_ERR(link))
+ res = readlink_copy(buffer, buflen, link, strlen(link));
do_delayed_call(&done);
return res;
}
diff --git a/fs/namespace.c b/fs/namespace.c
index eac057e56948..851af89e8d72 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3839,7 +3839,7 @@ int path_mount(const char *dev_name, struct path *path,
data_page);
}
-long do_mount(const char *dev_name, const char __user *dir_name,
+int do_mount(const char *dev_name, const char __user *dir_name,
const char *type_page, unsigned long flags, void *data_page)
{
struct path path;
diff --git a/fs/pipe.c b/fs/pipe.c
index 12b22c2723b7..82fede0f2111 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -253,7 +253,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
size_t total_len = iov_iter_count(to);
struct file *filp = iocb->ki_filp;
struct pipe_inode_info *pipe = filp->private_data;
- bool was_full, wake_next_reader = false;
+ bool wake_writer = false, wake_next_reader = false;
ssize_t ret;
/* Null read succeeds. */
@@ -264,14 +264,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
mutex_lock(&pipe->mutex);
/*
- * We only wake up writers if the pipe was full when we started
- * reading in order to avoid unnecessary wakeups.
+ * We only wake up writers if the pipe was full when we started reading
+ * and it is no longer full after reading to avoid unnecessary wakeups.
*
* But when we do wake up writers, we do so using a sync wakeup
* (WF_SYNC), because we want them to get going and generate more
* data for us.
*/
- was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
for (;;) {
/* Read ->head with a barrier vs post_one_notification() */
unsigned int head = smp_load_acquire(&pipe->head);
@@ -340,8 +339,10 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
buf->len = 0;
}
- if (!buf->len)
+ if (!buf->len) {
+ wake_writer |= pipe_full(head, tail, pipe->max_usage);
tail = pipe_update_tail(pipe, buf, tail);
+ }
total_len -= chars;
if (!total_len)
break; /* common path: read succeeded */
@@ -377,7 +378,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
* _very_ unlikely case that the pipe was full, but we got
* no data.
*/
- if (unlikely(was_full))
+ if (unlikely(wake_writer))
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
@@ -390,15 +391,15 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
return -ERESTARTSYS;
- mutex_lock(&pipe->mutex);
- was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
+ wake_writer = false;
wake_next_reader = true;
+ mutex_lock(&pipe->mutex);
}
if (pipe_empty(pipe->head, pipe->tail))
wake_next_reader = false;
mutex_unlock(&pipe->mutex);
- if (was_full)
+ if (wake_writer)
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
if (wake_next_reader)
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
diff --git a/fs/pnode.c b/fs/pnode.c
index a799e0315cc9..ef048f008bdd 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -611,10 +611,10 @@ int propagate_umount(struct list_head *list)
continue;
} else if (child->mnt.mnt_flags & MNT_UMOUNT) {
/*
- * We have come accross an partially unmounted
- * mount in list that has not been visited yet.
- * Remember it has been visited and continue
- * about our merry way.
+ * We have come across a partially unmounted
+ * mount in a list that has not been visited
+ * yet. Remember it has been visited and
+ * continue about our merry way.
*/
list_add_tail(&child->mnt_umounting, &visited);
continue;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 55ed3510d2bb..d6a0369caa93 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -500,7 +500,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
* a program is not able to use ptrace(2) in that case. It is
* safe because the task has stopped executing permanently.
*/
- if (permitted && (task->flags & (PF_EXITING|PF_DUMPCORE))) {
+ if (permitted && (task->flags & (PF_EXITING|PF_DUMPCORE|PF_POSTCOREDUMP))) {
if (try_get_task_stack(task)) {
eip = KSTK_EIP(task);
esp = KSTK_ESP(task);
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 8e159fc78c0a..c610224faf10 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -83,7 +83,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
res = ns_get_name(name, sizeof(name), task, ns_ops);
if (res >= 0)
- res = readlink_copy(buffer, buflen, name);
+ res = readlink_copy(buffer, buflen, name, strlen(name));
}
put_task_struct(task);
return res;
diff --git a/fs/select.c b/fs/select.c
index e223d1fe9d55..7da531b1cf6b 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -786,7 +786,7 @@ static inline int get_sigset_argpack(struct sigset_argpack *to,
}
return 0;
Efault:
- user_access_end();
+ user_read_access_end();
return -EFAULT;
}
@@ -1355,7 +1355,7 @@ static inline int get_compat_sigset_argpack(struct compat_sigset_argpack *to,
}
return 0;
Efault:
- user_access_end();
+ user_read_access_end();
return -EFAULT;
}
diff --git a/fs/vboxsf/Kconfig b/fs/vboxsf/Kconfig
index b84586ae08b3..d4694026db8b 100644
--- a/fs/vboxsf/Kconfig
+++ b/fs/vboxsf/Kconfig
@@ -1,6 +1,6 @@
config VBOXSF_FS
tristate "VirtualBox guest shared folder (vboxsf) support"
- depends on X86 && VBOXGUEST
+ depends on (ARM64 || X86) && VBOXGUEST
select NLS
help
VirtualBox hosts can share folders with guests, this driver
diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h
index c50882f19235..966092ffa89a 100644
--- a/include/linux/fiemap.h
+++ b/include/linux/fiemap.h
@@ -5,12 +5,18 @@
#include <uapi/linux/fiemap.h>
#include <linux/fs.h>
+/**
+ * struct fiemap_extent_info - fiemap request to a filesystem
+ * @fi_flags: Flags as passed from user
+ * @fi_extents_mapped: Number of mapped extents
+ * @fi_extents_max: Size of fiemap_extent array
+ * @fi_extents_start: Start of fiemap_extent array
+ */
struct fiemap_extent_info {
- unsigned int fi_flags; /* Flags as passed from user */
- unsigned int fi_extents_mapped; /* Number of mapped extents */
- unsigned int fi_extents_max; /* Size of fiemap_extent array */
- struct fiemap_extent __user *fi_extents_start; /* Start of
- fiemap_extent array */
+ unsigned int fi_flags;
+ unsigned int fi_extents_mapped;
+ unsigned int fi_extents_max;
+ struct fiemap_extent __user *fi_extents_start;
};
int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7e29433c5ecc..e06ea7e9ca15 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -322,6 +322,7 @@ struct readahead_control;
#define IOCB_NOWAIT (__force int) RWF_NOWAIT
#define IOCB_APPEND (__force int) RWF_APPEND
#define IOCB_ATOMIC (__force int) RWF_ATOMIC
+#define IOCB_DONTCACHE (__force int) RWF_DONTCACHE
/* non-RWF related bits - start at 16 */
#define IOCB_EVENTFD (1 << 16)
@@ -356,7 +357,8 @@ struct readahead_control;
{ IOCB_SYNC, "SYNC" }, \
{ IOCB_NOWAIT, "NOWAIT" }, \
{ IOCB_APPEND, "APPEND" }, \
- { IOCB_ATOMIC, "ATOMIC"}, \
+ { IOCB_ATOMIC, "ATOMIC" }, \
+ { IOCB_DONTCACHE, "DONTCACHE" }, \
{ IOCB_EVENTFD, "EVENTFD"}, \
{ IOCB_DIRECT, "DIRECT" }, \
{ IOCB_WRITE, "WRITE" }, \
@@ -626,6 +628,7 @@ is_uncached_acl(struct posix_acl *acl)
#define IOP_XATTR 0x0008
#define IOP_DEFAULT_READLINK 0x0010
#define IOP_MGTIME 0x0020
+#define IOP_CACHED_LINK 0x0040
/*
* Keep mostly read-only and often accessed (especially for
@@ -723,7 +726,10 @@ struct inode {
};
struct file_lock_context *i_flctx;
struct address_space i_data;
- struct list_head i_devices;
+ union {
+ struct list_head i_devices;
+ int i_linklen;
+ };
union {
struct pipe_inode_info *i_pipe;
struct cdev *i_cdev;
@@ -749,6 +755,13 @@ struct inode {
void *i_private; /* fs or device private pointer */
} __randomize_layout;
+static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen)
+{
+ inode->i_link = link;
+ inode->i_linklen = linklen;
+ inode->i_opflags |= IOP_CACHED_LINK;
+}
+
/*
* Get bit address from inode->i_state to use with wait_var_event()
* infrastructre.
@@ -2127,6 +2140,8 @@ struct file_operations {
#define FOP_UNSIGNED_OFFSET ((__force fop_flags_t)(1 << 5))
/* Supports asynchronous lock callbacks */
#define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6))
+/* File system supports uncached read/write buffered IO */
+#define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7))
/* Wrap a directory iterator that needs exclusive inode access */
int wrap_directory_iterator(struct file *, struct dir_context *,
@@ -3351,7 +3366,7 @@ extern const struct file_operations generic_ro_fops;
#define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
-extern int readlink_copy(char __user *, int, const char *);
+extern int readlink_copy(char __user *, int, const char *, int);
extern int page_readlink(struct dentry *, char __user *, int);
extern const char *page_get_link(struct dentry *, struct inode *,
struct delayed_call *);
@@ -3614,6 +3629,14 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags,
if (!(ki->ki_filp->f_mode & FMODE_CAN_ATOMIC_WRITE))
return -EOPNOTSUPP;
}
+ if (flags & RWF_DONTCACHE) {
+ /* file system must support it */
+ if (!(ki->ki_filp->f_op->fop_flags & FOP_DONTCACHE))
+ return -EOPNOTSUPP;
+ /* DAX mappings not supported */
+ if (IS_DAX(ki->ki_filp->f_mapping->host))
+ return -EOPNOTSUPP;
+ }
kiocb_flags |= (__force int) (flags & RWF_SUPPORTED);
if (flags & RWF_SYNC)
kiocb_flags |= IOCB_DSYNC;
diff --git a/include/linux/lockref.h b/include/linux/lockref.h
index c3a1f78bc884..c39f119659ba 100644
--- a/include/linux/lockref.h
+++ b/include/linux/lockref.h
@@ -34,14 +34,24 @@ struct lockref {
};
};
-extern void lockref_get(struct lockref *);
-extern int lockref_put_return(struct lockref *);
-extern int lockref_get_not_zero(struct lockref *);
-extern int lockref_put_not_zero(struct lockref *);
-extern int lockref_put_or_lock(struct lockref *);
-
-extern void lockref_mark_dead(struct lockref *);
-extern int lockref_get_not_dead(struct lockref *);
+/**
+ * lockref_init - Initialize a lockref
+ * @lockref: pointer to lockref structure
+ * @count: initial count
+ */
+static inline void lockref_init(struct lockref *lockref, unsigned int count)
+{
+ spin_lock_init(&lockref->lock);
+ lockref->count = count;
+}
+
+void lockref_get(struct lockref *lockref);
+int lockref_put_return(struct lockref *lockref);
+bool lockref_get_not_zero(struct lockref *lockref);
+bool lockref_put_or_lock(struct lockref *lockref);
+
+void lockref_mark_dead(struct lockref *lockref);
+bool lockref_get_not_dead(struct lockref *lockref);
/* Must be called under spinlock for reliable results */
static inline bool __lockref_is_dead(const struct lockref *l)
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 04213d8ef837..dcc17ce8a959 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -75,7 +75,7 @@ struct vfsmount {
static inline struct mnt_idmap *mnt_idmap(const struct vfsmount *mnt)
{
/* Pairs with smp_store_release() in do_idmap_mount(). */
- return smp_load_acquire(&mnt->mnt_idmap);
+ return READ_ONCE(mnt->mnt_idmap);
}
extern int mnt_want_write(struct vfsmount *mnt);
@@ -113,7 +113,7 @@ extern struct vfsmount *kern_mount(struct file_system_type *);
extern void kern_unmount(struct vfsmount *mnt);
extern int may_umount_tree(struct vfsmount *);
extern int may_umount(struct vfsmount *);
-extern long do_mount(const char *, const char __user *,
+int do_mount(const char *, const char __user *,
const char *, unsigned long, void *);
extern struct vfsmount *collect_mounts(const struct path *);
extern void drop_collected_mounts(struct vfsmount *);
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 5298765d6ca4..eb20dcaa51b5 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -272,7 +272,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex)
({ \
unsigned __seq; \
\
- while ((__seq = seqprop_sequence(s)) & 1) \
+ while (unlikely((__seq = seqprop_sequence(s)) & 1)) \
cpu_relax(); \
\
kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \
diff --git a/include/uapi/linux/fiemap.h b/include/uapi/linux/fiemap.h
index 24ca0c00cae3..9d9e8ae32b41 100644
--- a/include/uapi/linux/fiemap.h
+++ b/include/uapi/linux/fiemap.h
@@ -14,37 +14,56 @@
#include <linux/types.h>
+/**
+ * struct fiemap_extent - description of one fiemap extent
+ * @fe_logical: byte offset of the extent in the file
+ * @fe_physical: byte offset of extent on disk
+ * @fe_length: length in bytes for this extent
+ * @fe_flags: FIEMAP_EXTENT_* flags for this extent
+ */
struct fiemap_extent {
- __u64 fe_logical; /* logical offset in bytes for the start of
- * the extent from the beginning of the file */
- __u64 fe_physical; /* physical offset in bytes for the start
- * of the extent from the beginning of the disk */
- __u64 fe_length; /* length in bytes for this extent */
+ __u64 fe_logical;
+ __u64 fe_physical;
+ __u64 fe_length;
+ /* private: */
__u64 fe_reserved64[2];
- __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */