summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/binfmt_elf.c4
-rw-r--r--fs/binfmt_elf_fdpic.c4
-rw-r--r--fs/btrfs/ioctl.c9
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/exec.c8
-rw-r--r--fs/ext4/file.c3
-rw-r--r--fs/ext4/super.c3
-rw-r--r--fs/fcntl.c4
-rw-r--r--fs/notify/fanotify/fanotify.c31
-rw-r--r--fs/notify/fanotify/fanotify.h15
-rw-r--r--fs/notify/fanotify/fanotify_user.c150
-rw-r--r--fs/notify/fsnotify.c83
-rw-r--r--fs/open.c62
-rw-r--r--fs/xfs/xfs_file.c13
-rw-r--r--fs/xfs/xfs_super.c2
-rw-r--r--include/linux/fanotify.h18
-rw-r--r--include/linux/fs.h72
-rw-r--r--include/linux/fsnotify.h78
-rw-r--r--include/linux/fsnotify_backend.h53
-rw-r--r--include/linux/mm.h1
-rw-r--r--include/uapi/asm-generic/fcntl.h1
-rw-r--r--include/uapi/linux/fanotify.h18
-rw-r--r--kernel/fork.c12
-rw-r--r--mm/filemap.c86
-rw-r--r--mm/memory.c19
-rw-r--r--mm/nommu.c7
-rw-r--r--mm/readahead.c14
-rw-r--r--security/selinux/hooks.c3
28 files changed, 669 insertions, 106 deletions
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 106f0e8af177..8054f44d39cf 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1257,7 +1257,7 @@ out_free_interp:
}
reloc_func_desc = interp_load_addr;
- allow_write_access(interpreter);
+ exe_file_allow_write_access(interpreter);
fput(interpreter);
kfree(interp_elf_ex);
@@ -1354,7 +1354,7 @@ out_free_dentry:
kfree(interp_elf_ex);
kfree(interp_elf_phdata);
out_free_file:
- allow_write_access(interpreter);
+ exe_file_allow_write_access(interpreter);
if (interpreter)
fput(interpreter);
out_free_ph:
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index f1a7c4875c4a..c13ee8180b17 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -394,7 +394,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
goto error;
}
- allow_write_access(interpreter);
+ exe_file_allow_write_access(interpreter);
fput(interpreter);
interpreter = NULL;
}
@@ -467,7 +467,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
error:
if (interpreter) {
- allow_write_access(interpreter);
+ exe_file_allow_write_access(interpreter);
fput(interpreter);
}
kfree(interpreter_name);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ae98269a5e3a..6c18bad53cd3 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2544,6 +2544,15 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
goto out;
}
+ /*
+ * Don't allow defrag on pre-content watched files, as it could
+ * populate the page cache with 0's via readahead.
+ */
+ if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
if (argp) {
if (copy_from_user(&range, argp, sizeof(range))) {
ret = -EFAULT;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f809c3200c21..dc4fee519ca6 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -961,7 +961,7 @@ static int btrfs_fill_super(struct super_block *sb,
#endif
sb->s_xattr = btrfs_xattr_handlers;
sb->s_time_gran = 1;
- sb->s_iflags |= SB_I_CGROUPWB;
+ sb->s_iflags |= SB_I_CGROUPWB | SB_I_ALLOW_HSM;
err = super_setup_bdi(sb);
if (err) {
diff --git a/fs/exec.c b/fs/exec.c
index d58b061c5e42..18f25c23b09f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -913,7 +913,7 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
path_noexec(&file->f_path))
return ERR_PTR(-EACCES);
- err = deny_write_access(file);
+ err = exe_file_deny_write_access(file);
if (err)
return ERR_PTR(err);
@@ -928,7 +928,7 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
* Returns ERR_PTR on failure or allocated struct file on success.
*
* As this is a wrapper for the internal do_open_execat(), callers
- * must call allow_write_access() before fput() on release. Also see
+ * must call exe_file_allow_write_access() before fput() on release. Also see
* do_close_execat().
*/
struct file *open_exec(const char *name)
@@ -1493,7 +1493,7 @@ static void do_close_execat(struct file *file)
{
if (!file)
return;
- allow_write_access(file);
+ exe_file_allow_write_access(file);
fput(file);
}
@@ -1822,7 +1822,7 @@ static int exec_binprm(struct linux_binprm *bprm)
bprm->file = bprm->interpreter;
bprm->interpreter = NULL;
- allow_write_access(exec);
+ exe_file_allow_write_access(exec);
if (unlikely(bprm->have_execfd)) {
if (bprm->executable) {
fput(exec);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 3bd96c3d4cd0..a5205149adba 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -756,6 +756,9 @@ retry:
return VM_FAULT_SIGBUS;
}
} else {
+ result = filemap_fsnotify_fault(vmf);
+ if (unlikely(result))
+ return result;
filemap_invalidate_lock_shared(mapping);
}
result = dax_iomap_fault(vmf, order, &pfn, &error, &ext4_iomap_ops);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index fdf4817a7dbc..a50e5c31b937 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -5301,6 +5301,9 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
/* i_version is always enabled now */
sb->s_flags |= SB_I_VERSION;
+ /* HSM events are allowed by default. */
+ sb->s_iflags |= SB_I_ALLOW_HSM;
+
err = ext4_check_feature_compatibility(sb, es, silent);
if (err)
goto failed_mount;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 49884fa3c81d..5598e4d57422 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -1158,10 +1158,10 @@ static int __init fcntl_init(void)
* Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
* is defined as O_NONBLOCK on some platforms and not on others.
*/
- BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ !=
+ BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ !=
HWEIGHT32(
(VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
- __FMODE_EXEC | __FMODE_NONOTIFY));
+ __FMODE_EXEC));
fasync_cache = kmem_cache_create("fasync_cache",
sizeof(struct fasync_struct), 0,
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 24c7c5df4998..95646f7c46ca 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -223,7 +223,7 @@ static int fanotify_get_response(struct fsnotify_group *group,
struct fanotify_perm_event *event,
struct fsnotify_iter_info *iter_info)
{
- int ret;
+ int ret, errno;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
@@ -262,14 +262,23 @@ static int fanotify_get_response(struct fsnotify_group *group,
ret = 0;
break;
case FAN_DENY:
+ /* Check custom errno from pre-content events */
+ errno = fanotify_get_response_errno(event->response);
+ if (errno) {
+ ret = -errno;
+ break;
+ }
+ fallthrough;
default:
ret = -EPERM;
}
/* Check if the response should be audited */
- if (event->response & FAN_AUDIT)
- audit_fanotify(event->response & ~FAN_AUDIT,
- &event->audit_rule);
+ if (event->response & FAN_AUDIT) {
+ u32 response = event->response &
+ (FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS);
+ audit_fanotify(response & ~FAN_AUDIT, &event->audit_rule);
+ }
pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
group, event, ret);
@@ -548,9 +557,13 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path,
return &pevent->fae;
}
-static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path,
+static struct fanotify_event *fanotify_alloc_perm_event(const void *data,
+ int data_type,
gfp_t gfp)
{
+ const struct path *path = fsnotify_data_path(data, data_type);
+ const struct file_range *range =
+ fsnotify_data_file_range(data, data_type);
struct fanotify_perm_event *pevent;
pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp);
@@ -564,6 +577,9 @@ static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path,
pevent->hdr.len = 0;
pevent->state = FAN_EVENT_INIT;
pevent->path = *path;
+ /* NULL ppos means no range info */
+ pevent->ppos = range ? &range->pos : NULL;
+ pevent->count = range ? range->count : 0;
path_get(path);
return &pevent->fae;
@@ -801,7 +817,7 @@ static struct fanotify_event *fanotify_alloc_event(
old_memcg = set_active_memcg(group->memcg);
if (fanotify_is_perm_event(mask)) {
- event = fanotify_alloc_perm_event(path, gfp);
+ event = fanotify_alloc_perm_event(data, data_type, gfp);
} else if (fanotify_is_error_event(mask)) {
event = fanotify_alloc_error_event(group, fsid, data,
data_type, &hash);
@@ -909,8 +925,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM);
BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR);
BUILD_BUG_ON(FAN_RENAME != FS_RENAME);
+ BUILD_BUG_ON(FAN_PRE_ACCESS != FS_PRE_ACCESS);
- BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 21);
+ BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 22);
mask = fanotify_group_event_mask(group, iter_info, &match_mask,
mask, data, data_type, dir);
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index e5ab33cae6a7..c12cbc270539 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -425,6 +425,8 @@ FANOTIFY_PE(struct fanotify_event *event)
struct fanotify_perm_event {
struct fanotify_event fae;
struct path path;
+ const loff_t *ppos; /* optional file range info */
+ size_t count;
u32 response; /* userspace answer to the event */
unsigned short state; /* state of the event */
int fd; /* fd we passed to userspace for this event */
@@ -446,6 +448,14 @@ static inline bool fanotify_is_perm_event(u32 mask)
mask & FANOTIFY_PERM_EVENTS;
}
+static inline bool fanotify_event_has_access_range(struct fanotify_event *event)
+{
+ if (!(event->mask & FANOTIFY_PRE_CONTENT_EVENTS))
+ return false;
+
+ return FANOTIFY_PERM(event)->ppos;
+}
+
static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse)
{
return container_of(fse, struct fanotify_event, fse);
@@ -518,3 +528,8 @@ static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark)
return mflags;
}
+
+static inline u32 fanotify_get_response_errno(int res)
+{
+ return (res >> FAN_ERRNO_SHIFT) & FAN_ERRNO_MASK;
+}
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 2d85c71717d6..6ff94e312232 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -100,8 +100,7 @@ static void __init fanotify_sysctls_init(void)
*
* Internal and external open flags are stored together in field f_flags of
* struct file. Only external open flags shall be allowed in event_f_flags.
- * Internal flags like FMODE_NONOTIFY, FMODE_EXEC, FMODE_NOCMTIME shall be
- * excluded.
+ * Internal flags like FMODE_EXEC shall be excluded.
*/
#define FANOTIFY_INIT_ALL_EVENT_F_BITS ( \
O_ACCMODE | O_APPEND | O_NONBLOCK | \
@@ -118,10 +117,12 @@ struct kmem_cache *fanotify_perm_event_cachep __ro_after_init;
#define FANOTIFY_EVENT_ALIGN 4
#define FANOTIFY_FID_INFO_HDR_LEN \
(sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle))
-#define FANOTIFY_PIDFD_INFO_HDR_LEN \
+#define FANOTIFY_PIDFD_INFO_LEN \
sizeof(struct fanotify_event_info_pidfd)
#define FANOTIFY_ERROR_INFO_LEN \
(sizeof(struct fanotify_event_info_error))
+#define FANOTIFY_RANGE_INFO_LEN \
+ (sizeof(struct fanotify_event_info_range))
static int fanotify_fid_info_len(int fh_len, int name_len)
{
@@ -159,9 +160,6 @@ static size_t fanotify_event_len(unsigned int info_mode,
int fh_len;
int dot_len = 0;
- if (!info_mode)
- return event_len;
-
if (fanotify_is_error_event(event->mask))
event_len += FANOTIFY_ERROR_INFO_LEN;
@@ -176,14 +174,17 @@ static size_t fanotify_event_len(unsigned int info_mode,
dot_len = 1;
}
- if (info_mode & FAN_REPORT_PIDFD)
- event_len += FANOTIFY_PIDFD_INFO_HDR_LEN;
-
if (fanotify_event_has_object_fh(event)) {
fh_len = fanotify_event_object_fh_len(event);
event_len += fanotify_fid_info_len(fh_len, dot_len);
}
+ if (info_mode & FAN_REPORT_PIDFD)
+ event_len += FANOTIFY_PIDFD_INFO_LEN;
+
+ if (fanotify_event_has_access_range(event))
+ event_len += FANOTIFY_RANGE_INFO_LEN;
+
return event_len;
}
@@ -258,12 +259,11 @@ static int create_fd(struct fsnotify_group *group, const struct path *path,
return client_fd;
/*
- * we need a new file handle for the userspace program so it can read even if it was
- * originally opened O_WRONLY.
+ * We provide an fd for the userspace program, so it could access the
+ * file without generating fanotify events itself.
*/
- new_file = dentry_open(path,
- group->fanotify_data.f_flags | __FMODE_NONOTIFY,
- current_cred());
+ new_file = dentry_open_nonotify(path, group->fanotify_data.f_flags,
+ current_cred());
if (IS_ERR(new_file)) {
put_unused_fd(client_fd);
client_fd = PTR_ERR(new_file);
@@ -327,11 +327,12 @@ static int process_access_response(struct fsnotify_group *group,
struct fanotify_perm_event *event;
int fd = response_struct->fd;
u32 response = response_struct->response;
+ int errno = fanotify_get_response_errno(response);
int ret = info_len;
struct fanotify_response_info_audit_rule friar;
- pr_debug("%s: group=%p fd=%d response=%u buf=%p size=%zu\n", __func__,
- group, fd, response, info, info_len);
+ pr_debug("%s: group=%p fd=%d response=%x errno=%d buf=%p size=%zu\n",
+ __func__, group, fd, response, errno, info, info_len);
/*
* make sure the response is valid, if invalid we do nothing and either
* userspace can send a valid response or we will clean it up after the
@@ -342,7 +343,31 @@ static int process_access_response(struct fsnotify_group *group,
switch (response & FANOTIFY_RESPONSE_ACCESS) {
case FAN_ALLOW:
+ if (errno)
+ return -EINVAL;
+ break;
case FAN_DENY:
+ /* Custom errno is supported only for pre-content groups */
+ if (errno && group->priority != FSNOTIFY_PRIO_PRE_CONTENT)
+ return -EINVAL;
+
+ /*
+ * Limit errno to values expected on open(2)/read(2)/write(2)
+ * of regular files.
+ */
+ switch (errno) {
+ case 0:
+ case EIO:
+ case EPERM:
+ case EBUSY:
+ case ETXTBSY:
+ case EAGAIN:
+ case ENOSPC:
+ case EDQUOT:
+ break;
+ default:
+ return -EINVAL;
+ }
break;
default:
return -EINVAL;
@@ -506,7 +531,7 @@ static int copy_pidfd_info_to_user(int pidfd,
size_t count)
{
struct fanotify_event_info_pidfd info = { };
- size_t info_len = FANOTIFY_PIDFD_INFO_HDR_LEN;
+ size_t info_len = FANOTIFY_PIDFD_INFO_LEN;
if (WARN_ON_ONCE(info_len > count))
return -EFAULT;
@@ -521,6 +546,30 @@ static int copy_pidfd_info_to_user(int pidfd,
return info_len;
}
+static size_t copy_range_info_to_user(struct fanotify_event *event,
+ char __user *buf, int count)
+{
+ struct fanotify_perm_event *pevent = FANOTIFY_PERM(event);
+ struct fanotify_event_info_range info = { };
+ size_t info_len = FANOTIFY_RANGE_INFO_LEN;
+
+ if (WARN_ON_ONCE(info_len > count))
+ return -EFAULT;
+
+ if (WARN_ON_ONCE(!pevent->ppos))
+ return -EINVAL;
+
+ info.hdr.info_type = FAN_EVENT_INFO_TYPE_RANGE;
+ info.hdr.len = info_len;
+ info.offset = *(pevent->ppos);
+ info.count = pevent->count;
+
+ if (copy_to_user(buf, &info, info_len))
+ return -EFAULT;
+
+ return info_len;
+}
+
static int copy_info_records_to_user(struct fanotify_event *event,
struct fanotify_info *info,
unsigned int info_mode, int pidfd,
@@ -642,6 +691,15 @@ static int copy_info_records_to_user(struct fanotify_event *event,
total_bytes += ret;
}
+ if (fanotify_event_has_access_range(event)) {
+ ret = copy_range_info_to_user(event, buf, count);
+ if (ret < 0)
+ return ret;
+ buf += ret;
+ count -= ret;
+ total_bytes += ret;
+ }
+
return total_bytes;
}
@@ -756,12 +814,10 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
buf += FAN_EVENT_METADATA_LEN;
count -= FAN_EVENT_METADATA_LEN;
- if (info_mode) {
- ret = copy_info_records_to_user(event, info, info_mode, pidfd,
- buf, count);
- if (ret < 0)
- goto out_close_fd;
- }
+ ret = copy_info_records_to_user(event, info, info_mode, pidfd,
+ buf, count);
+ if (ret < 0)
+ goto out_close_fd;
if (f)
fd_install(fd, f);
@@ -1294,7 +1350,7 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group)
}
static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
- unsigned int fan_flags)
+ __u32 mask, unsigned int fan_flags)
{
/*
* Non evictable mark cannot be downgraded to evictable mark.
@@ -1321,6 +1377,11 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
return -EEXIST;
+ /* For now pre-content events are not generated for directories */
+ mask |= fsn_mark->mask;
+ if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR)
+ return -EEXIST;
+
return 0;
}
@@ -1347,7 +1408,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
/*
* Check if requested mark flags conflict with an existing mark flags.
*/
- ret = fanotify_may_update_existing_mark(fsn_mark, fan_flags);
+ ret = fanotify_may_update_existing_mark(fsn_mark, mask, fan_flags);
if (ret)
goto out;
@@ -1409,6 +1470,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
unsigned int fid_mode = flags & FANOTIFY_FID_BITS;
unsigned int class = flags & FANOTIFY_CLASS_BITS;
unsigned int internal_flags = 0;
+ struct file *file;
pr_debug("%s: flags=%x event_f_flags=%x\n",
__func__, flags, event_f_flags);
@@ -1477,7 +1539,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
(!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID)))
return -EINVAL;
- f_flags = O_RDWR | __FMODE_NONOTIFY;
+ f_flags = O_RDWR;
if (flags & FAN_CLOEXEC)
f_flags |= O_CLOEXEC;
if (flags & FAN_NONBLOCK)
@@ -1555,10 +1617,18 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
goto out_destroy_group;
}
- fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
+ fd = get_unused_fd_flags(f_flags);
if (fd < 0)
goto out_destroy_group;
+ file = anon_inode_getfile_fmode("[fanotify]", &fanotify_fops, group,
+ f_flags, FMODE_NONOTIFY);
+ if (IS_ERR(file)) {
+ put_unused_fd(fd);
+ fd = PTR_ERR(file);
+ goto out_destroy_group;
+ }
+ fd_install(fd, file);
return fd;
out_destroy_group:
@@ -1638,12 +1708,24 @@ static int fanotify_events_supported(struct fsnotify_group *group,
unsigned int flags)
{
unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
+ bool is_dir = d_is_dir(path->dentry);
/* Strict validation of events in non-dir inode mask with v5.17+ APIs */
bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) ||
(mask & FAN_RENAME) ||
(flags & FAN_MARK_IGNORE);
/*
+ * Filesystems need to opt-into pre-content evnets (a.k.a HSM)
+ * and they are only supported on regular files and directories.
+ */
+ if (mask & FANOTIFY_PRE_CONTENT_EVENTS) {
+ if (!(path->mnt->mnt_sb->s_iflags & SB_I_ALLOW_HSM))
+ return -EOPNOTSUPP;
+ if (!is_dir && !d_is_reg(path->dentry))
+ return -EINVAL;
+ }
+
+ /*
* Some filesystems such as 'proc' acquire unusual locks when opening
* files. For them fanotify permission events have high chances of
* deadlocking the system - open done when reporting fanotify event
@@ -1675,7 +1757,7 @@ static int fanotify_events_supported(struct fsnotify_group *group,
* but because we always allowed it, error only when using new APIs.
*/
if (strict_dir_events && mark_type == FAN_MARK_INODE &&
- !d_is_dir(path->dentry) && (mask & FANOTIFY_DIRONLY_EVENT_BITS))
+ !is_dir && (mask & FANOTIFY_DIRONLY_EVENT_BITS))
return -ENOTDIR;
return 0;
@@ -1776,10 +1858,14 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
return -EPERM;
/*
- * Permission events require minimum priority FAN_CLASS_CONTENT.
+ * Permission events are not allowed for FAN_CLASS_NOTIF.
+ * Pre-content permission events are not allowed for FAN_CLASS_CONTENT.
*/
if (mask & FANOTIFY_PERM_EVENTS &&
- group->priority < FSNOTIFY_PRIO_CONTENT)
+ group->priority == FSNOTIFY_PRIO_NORMAL)
+ return -EINVAL;
+ else if (mask & FANOTIFY_PRE_CONTENT_EVENTS &&
+ group->priority == FSNOTIFY_PRIO_CONTENT)
return -EINVAL;
if (mask & FAN_FS_ERROR &&
@@ -1814,6 +1900,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME))
return -EINVAL;
+ /* Pre-content events are not currently generated for directories. */
+ if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR)
+ return -EINVAL;
+
if (mark_cmd == FAN_MARK_FLUSH) {
if (mark_type == FAN_MARK_MOUNT)
fsnotify_clear_vfsmount_marks_by_group(group);
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index f976949d2634..8ee495a58d0a 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -193,7 +193,7 @@ static bool fsnotify_event_needs_parent(struct inode *inode, __u32 mnt_mask,
return mask & marks_mask;
}
-/* Are there any inode/mount/sb objects that are interested in this event? */
+/* Are there any inode/mount/sb objects that watch for these events? */
static inline bool fsnotify_object_watched(struct inode *inode, __u32 mnt_mask,
__u32 mask)
{
@@ -203,6 +203,24 @@ static inline bool fsnotify_object_watched(struct inode *inode, __u32 mnt_mask,
return mask & marks_mask & ALL_FSNOTIFY_EVENTS;
}
+/* Report pre-content event with optional range info */
+int fsnotify_pre_content(const struct path *path, const loff_t *ppos,
+ size_t count)
+{
+ struct file_range range;
+
+ /* Report page aligned range only when pos is known */
+ if (!ppos)
+ return fsnotify_path(path, FS_PRE_ACCESS);
+
+ range.path = path;
+ range.pos = PAGE_ALIGN_DOWN(*ppos);
+ range.count = PAGE_ALIGN(*ppos + count) - range.pos;
+
+ return fsnotify_parent(path->dentry, FS_PRE_ACCESS, &range,
+ FSNOTIFY_EVENT_FILE_RANGE);
+}
+
/*
* Notify this dentry's parent about a child's events with child name info
* if parent is watching or if inode/sb/mount are interested in events with
@@ -623,11 +641,72 @@ out:
}
EXPORT_SYMBOL_GPL(fsnotify);
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+/*
+ * At open time we check fsnotify_sb_has_priority_watchers() and set the
+ * FMODE_NONOTIFY_ mode bits accordignly.
+ * Later, fsnotify permission hooks do not check if there are permission event
+ * watches, but that there were permission event watches at open time.
+ */
+void file_set_fsnotify_mode(struct file *file)
+{
+ struct dentry *dentry = file->f_path.dentry, *parent;
+ struct super_block *sb = dentry->d_sb;
+ __u32 mnt_mask, p_mask;
+
+ /* Is it a file opened by fanotify? */
+ if (FMODE_FSNOTIFY_NONE(file->f_mode))
+ return;
+
+ /*
+ * Permission events is a super set of pre-content events, so if there
+ * are no permission event watchers, there are also no pre-content event
+ * watchers and this is implied from the single FMODE_NONOTIFY_PERM bit.
+ */
+ if (likely(!fsnotify_sb_has_priority_watchers(sb,
+ FSNOTIFY_PRIO_CONTENT))) {
+ file->f_mode |= FMODE_NONOTIFY_PERM;
+ return;
+ }
+
+ /*
+ * If there are permission event watchers but no pre-content event
+ * watchers, set FMODE_NONOTIFY | FMODE_NONOTIFY_PERM to indicate that.
+ */
+ if ((!d_is_dir(dentry) && !d_is_reg(dentry)) ||
+ likely(!fsnotify_sb_has_priority_watchers(sb,
+ FSNOTIFY_PRIO_PRE_CONTENT))) {
+ file->f_mode |= FMODE_NONOTIFY | FMODE_NONOTIFY_PERM;
+ return;
+ }
+
+ /*
+ * OK, there are some pre-content watchers. Check if anybody is
+ * watching for pre-content events on *this* file.
+ */
+ mnt_mask = READ_ONCE(real_mount(file->f_path.mnt)->mnt_fsnotify_mask);
+ if (unlikely(fsnotify_object_watched(d_inode(dentry), mnt_mask,
+ FSNOTIFY_PRE_CONTENT_EVENTS)))
+ return;
+
+ /* Is parent watching for pre-content events on this file? */
+ if (dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED) {
+ parent = dget_parent(dentry);
+ p_mask = fsnotify_inode_watches_children(d_inode(parent));
+ dput(parent);
+ if (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS)
+ return;
+ }
+ /* Nobody watching for pre-content events from this file */
+ file->f_mode |= FMODE_NONOTIFY | FMODE_NONOTIFY_PERM;
+}
+#endif
+
static __init int fsnotify_init(void)
{
int ret;
- BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23);
+ BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 24);
ret = init_srcu_struct(&fsnotify_mark_srcu);
if (ret)
diff --git a/fs/open.c b/fs/open.c
index ffcfef67ac86..0a5d2f6061c6 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -81,14 +81,18 @@ long vfs_truncate(const struct path *path, loff_t length)
if (!S_ISREG(inode->i_mode))
return -EINVAL;
- error = mnt_want_write(path->mnt);
- if (error)
- goto out;
-
idmap = mnt_idmap(path->mnt);
error = inode_permission(idmap, inode, MAY_WRITE);
if (error)
- goto mnt_drop_write_and_out;
+ return error;
+
+ error = fsnotify_truncate_perm(path, length);
+ if (error)
+ return error;
+
+ error = mnt_want_write(path->mnt);
+ if (error)
+ return error;
error = -EPERM;
if (IS_APPEND(inode))
@@ -114,7 +118,7 @@ put_write_and_out:
put_write_access(inode);
mnt_drop_write_and_out:
mnt_drop_write(path->mnt);
-out:
+
return error;
}
EXPORT_SYMBOL_GPL(vfs_truncate);
@@ -175,11 +179,18 @@ long do_ftruncate(struct file *file, loff_t length, int small)
/* Check IS_APPEND on real upper inode */
if (IS_APPEND(file_inode(file)))
return -EPERM;
- sb_start_write(inode->i_sb);
+
error = security_file_truncate(file);
- if (!error)
- error = do_truncate(file_mnt_idmap(file), dentry, length,
- ATTR_MTIME | ATTR_CTIME, file);
+ if (error)
+ return error;
+
+ error = fsnotify_truncate_perm(&file->f_path, length);
+ if (error)
+ return error;
+
+ sb_start_write(inode->i_sb);
+ error = do_truncate(file_mnt_idmap(file), dentry, length,
+ ATTR_MTIME | ATTR_CTIME, file);
sb_end_write(inode->i_sb);
return error;
@@ -894,7 +905,7 @@ static int do_dentry_open(struct file *f,
f->f_sb_err = file_sample_sb_err(f);
if (unlikely(f->f_flags & O_PATH)) {
- f->f_mode = FMODE_PATH | FMODE_OPENED;
+ f->f_mode = FMODE_PATH | FMODE_OPENED | FMODE_NONOTIFY;
f->f_op = &empty_fops;
return 0;
}
@@ -922,6 +933,12 @@ static int do_dentry_open(struct file *f,
if (error)
goto cleanup_all;
+ /*
+ * Set FMODE_NONOTIFY_* bits according to existing permission watches.
+ * If FMODE_NONOTIFY was already set for an fanotify fd, this doesn't
+ * change anything.
+ */
+ file_set_fsnotify_mode(f);
error = fsnotify_open_perm(f);
if (error)
goto cleanup_all;
@@ -1098,6 +1115,23 @@ struct file *dentry_open(const struct path *path, int flags,
}
EXPORT_SYMBOL(dentry_open);
+struct file *dentry_open_nonotify(const struct path *path, int flags,
+ const struct cred *cred)
+{
+ struct file *f = alloc_empty_file(flags, cred);
+ if (!IS_ERR(f)) {
+ int error;
+
+ f->f_mode |= FMODE_NONOTIFY;
+ error = vfs_open(path, f);
+ if (error) {
+ fput(f);
+ f = ERR_PTR(error);
+ }
+ }
+ return f;
+}
+
/**
* dentry_create - Create and open a file
* @path: path to create
@@ -1195,7 +1229,7 @@ inline struct open_how build_open_how(int flags, umode_t mode)
inline int build_open_flags(const struct open_how *how, struct open_flags *op)
{
u64 flags = how->flags;
- u64 strip = __FMODE_NONOTIFY | O_CLOEXEC;
+ u64 strip = O_CLOEXEC;
int lookup_flags = 0;
int acc_mode = ACC_MODE(flags);
@@ -1203,9 +1237,7 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op)
"struct open_flags doesn't yet handle flags > 32 bits");
/*
- * Strip flags that either shouldn't be set by userspace like
- * FMODE_NONOTIFY or that aren't relevant in deter