diff options
-rw-r--r-- | fs/binfmt_elf.c | 4 | ||||
-rw-r--r-- | fs/binfmt_elf_fdpic.c | 4 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 9 | ||||
-rw-r--r-- | fs/btrfs/super.c | 2 | ||||
-rw-r--r-- | fs/exec.c | 8 | ||||
-rw-r--r-- | fs/ext4/file.c | 3 | ||||
-rw-r--r-- | fs/ext4/super.c | 3 | ||||
-rw-r--r-- | fs/fcntl.c | 4 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify.c | 31 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify.h | 15 | ||||
-rw-r--r-- | fs/notify/fanotify/fanotify_user.c | 150 | ||||
-rw-r--r-- | fs/notify/fsnotify.c | 83 | ||||
-rw-r--r-- | fs/open.c | 62 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 13 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 2 | ||||
-rw-r--r-- | include/linux/fanotify.h | 18 | ||||
-rw-r--r-- | include/linux/fs.h | 72 | ||||
-rw-r--r-- | include/linux/fsnotify.h | 78 | ||||
-rw-r--r-- | include/linux/fsnotify_backend.h | 53 | ||||
-rw-r--r-- | include/linux/mm.h | 1 | ||||
-rw-r--r-- | include/uapi/asm-generic/fcntl.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/fanotify.h | 18 | ||||
-rw-r--r-- | kernel/fork.c | 12 | ||||
-rw-r--r-- | mm/filemap.c | 86 | ||||
-rw-r--r-- | mm/memory.c | 19 | ||||
-rw-r--r-- | mm/nommu.c | 7 | ||||
-rw-r--r-- | mm/readahead.c | 14 | ||||
-rw-r--r-- | security/selinux/hooks.c | 3 |
28 files changed, 669 insertions, 106 deletions
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 106f0e8af177..8054f44d39cf 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1257,7 +1257,7 @@ out_free_interp: } reloc_func_desc = interp_load_addr; - allow_write_access(interpreter); + exe_file_allow_write_access(interpreter); fput(interpreter); kfree(interp_elf_ex); @@ -1354,7 +1354,7 @@ out_free_dentry: kfree(interp_elf_ex); kfree(interp_elf_phdata); out_free_file: - allow_write_access(interpreter); + exe_file_allow_write_access(interpreter); if (interpreter) fput(interpreter); out_free_ph: diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index f1a7c4875c4a..c13ee8180b17 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -394,7 +394,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) goto error; } - allow_write_access(interpreter); + exe_file_allow_write_access(interpreter); fput(interpreter); interpreter = NULL; } @@ -467,7 +467,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) error: if (interpreter) { - allow_write_access(interpreter); + exe_file_allow_write_access(interpreter); fput(interpreter); } kfree(interpreter_name); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ae98269a5e3a..6c18bad53cd3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2544,6 +2544,15 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) goto out; } + /* + * Don't allow defrag on pre-content watched files, as it could + * populate the page cache with 0's via readahead. + */ + if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) { + ret = -EINVAL; + goto out; + } + if (argp) { if (copy_from_user(&range, argp, sizeof(range))) { ret = -EFAULT; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f809c3200c21..dc4fee519ca6 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -961,7 +961,7 @@ static int btrfs_fill_super(struct super_block *sb, #endif sb->s_xattr = btrfs_xattr_handlers; sb->s_time_gran = 1; - sb->s_iflags |= SB_I_CGROUPWB; + sb->s_iflags |= SB_I_CGROUPWB | SB_I_ALLOW_HSM; err = super_setup_bdi(sb); if (err) { diff --git a/fs/exec.c b/fs/exec.c index d58b061c5e42..18f25c23b09f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -913,7 +913,7 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) path_noexec(&file->f_path)) return ERR_PTR(-EACCES); - err = deny_write_access(file); + err = exe_file_deny_write_access(file); if (err) return ERR_PTR(err); @@ -928,7 +928,7 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) * Returns ERR_PTR on failure or allocated struct file on success. * * As this is a wrapper for the internal do_open_execat(), callers - * must call allow_write_access() before fput() on release. Also see + * must call exe_file_allow_write_access() before fput() on release. Also see * do_close_execat(). */ struct file *open_exec(const char *name) @@ -1493,7 +1493,7 @@ static void do_close_execat(struct file *file) { if (!file) return; - allow_write_access(file); + exe_file_allow_write_access(file); fput(file); } @@ -1822,7 +1822,7 @@ static int exec_binprm(struct linux_binprm *bprm) bprm->file = bprm->interpreter; bprm->interpreter = NULL; - allow_write_access(exec); + exe_file_allow_write_access(exec); if (unlikely(bprm->have_execfd)) { if (bprm->executable) { fput(exec); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 3bd96c3d4cd0..a5205149adba 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -756,6 +756,9 @@ retry: return VM_FAULT_SIGBUS; } } else { + result = filemap_fsnotify_fault(vmf); + if (unlikely(result)) + return result; filemap_invalidate_lock_shared(mapping); } result = dax_iomap_fault(vmf, order, &pfn, &error, &ext4_iomap_ops); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index fdf4817a7dbc..a50e5c31b937 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5301,6 +5301,9 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) /* i_version is always enabled now */ sb->s_flags |= SB_I_VERSION; + /* HSM events are allowed by default. */ + sb->s_iflags |= SB_I_ALLOW_HSM; + err = ext4_check_feature_compatibility(sb, es, silent); if (err) goto failed_mount; diff --git a/fs/fcntl.c b/fs/fcntl.c index 49884fa3c81d..5598e4d57422 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -1158,10 +1158,10 @@ static int __init fcntl_init(void) * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY * is defined as O_NONBLOCK on some platforms and not on others. */ - BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != + BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) | - __FMODE_EXEC | __FMODE_NONOTIFY)); + __FMODE_EXEC)); fasync_cache = kmem_cache_create("fasync_cache", sizeof(struct fasync_struct), 0, diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 24c7c5df4998..95646f7c46ca 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -223,7 +223,7 @@ static int fanotify_get_response(struct fsnotify_group *group, struct fanotify_perm_event *event, struct fsnotify_iter_info *iter_info) { - int ret; + int ret, errno; pr_debug("%s: group=%p event=%p\n", __func__, group, event); @@ -262,14 +262,23 @@ static int fanotify_get_response(struct fsnotify_group *group, ret = 0; break; case FAN_DENY: + /* Check custom errno from pre-content events */ + errno = fanotify_get_response_errno(event->response); + if (errno) { + ret = -errno; + break; + } + fallthrough; default: ret = -EPERM; } /* Check if the response should be audited */ - if (event->response & FAN_AUDIT) - audit_fanotify(event->response & ~FAN_AUDIT, - &event->audit_rule); + if (event->response & FAN_AUDIT) { + u32 response = event->response & + (FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS); + audit_fanotify(response & ~FAN_AUDIT, &event->audit_rule); + } pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__, group, event, ret); @@ -548,9 +557,13 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path, return &pevent->fae; } -static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path, +static struct fanotify_event *fanotify_alloc_perm_event(const void *data, + int data_type, gfp_t gfp) { + const struct path *path = fsnotify_data_path(data, data_type); + const struct file_range *range = + fsnotify_data_file_range(data, data_type); struct fanotify_perm_event *pevent; pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp); @@ -564,6 +577,9 @@ static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path, pevent->hdr.len = 0; pevent->state = FAN_EVENT_INIT; pevent->path = *path; + /* NULL ppos means no range info */ + pevent->ppos = range ? &range->pos : NULL; + pevent->count = range ? range->count : 0; path_get(path); return &pevent->fae; @@ -801,7 +817,7 @@ static struct fanotify_event *fanotify_alloc_event( old_memcg = set_active_memcg(group->memcg); if (fanotify_is_perm_event(mask)) { - event = fanotify_alloc_perm_event(path, gfp); + event = fanotify_alloc_perm_event(data, data_type, gfp); } else if (fanotify_is_error_event(mask)) { event = fanotify_alloc_error_event(group, fsid, data, data_type, &hash); @@ -909,8 +925,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM); BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR); BUILD_BUG_ON(FAN_RENAME != FS_RENAME); + BUILD_BUG_ON(FAN_PRE_ACCESS != FS_PRE_ACCESS); - BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 21); + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 22); mask = fanotify_group_event_mask(group, iter_info, &match_mask, mask, data, data_type, dir); diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index e5ab33cae6a7..c12cbc270539 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -425,6 +425,8 @@ FANOTIFY_PE(struct fanotify_event *event) struct fanotify_perm_event { struct fanotify_event fae; struct path path; + const loff_t *ppos; /* optional file range info */ + size_t count; u32 response; /* userspace answer to the event */ unsigned short state; /* state of the event */ int fd; /* fd we passed to userspace for this event */ @@ -446,6 +448,14 @@ static inline bool fanotify_is_perm_event(u32 mask) mask & FANOTIFY_PERM_EVENTS; } +static inline bool fanotify_event_has_access_range(struct fanotify_event *event) +{ + if (!(event->mask & FANOTIFY_PRE_CONTENT_EVENTS)) + return false; + + return FANOTIFY_PERM(event)->ppos; +} + static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse) { return container_of(fse, struct fanotify_event, fse); @@ -518,3 +528,8 @@ static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark) return mflags; } + +static inline u32 fanotify_get_response_errno(int res) +{ + return (res >> FAN_ERRNO_SHIFT) & FAN_ERRNO_MASK; +} diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 2d85c71717d6..6ff94e312232 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -100,8 +100,7 @@ static void __init fanotify_sysctls_init(void) * * Internal and external open flags are stored together in field f_flags of * struct file. Only external open flags shall be allowed in event_f_flags. - * Internal flags like FMODE_NONOTIFY, FMODE_EXEC, FMODE_NOCMTIME shall be - * excluded. + * Internal flags like FMODE_EXEC shall be excluded. */ #define FANOTIFY_INIT_ALL_EVENT_F_BITS ( \ O_ACCMODE | O_APPEND | O_NONBLOCK | \ @@ -118,10 +117,12 @@ struct kmem_cache *fanotify_perm_event_cachep __ro_after_init; #define FANOTIFY_EVENT_ALIGN 4 #define FANOTIFY_FID_INFO_HDR_LEN \ (sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle)) -#define FANOTIFY_PIDFD_INFO_HDR_LEN \ +#define FANOTIFY_PIDFD_INFO_LEN \ sizeof(struct fanotify_event_info_pidfd) #define FANOTIFY_ERROR_INFO_LEN \ (sizeof(struct fanotify_event_info_error)) +#define FANOTIFY_RANGE_INFO_LEN \ + (sizeof(struct fanotify_event_info_range)) static int fanotify_fid_info_len(int fh_len, int name_len) { @@ -159,9 +160,6 @@ static size_t fanotify_event_len(unsigned int info_mode, int fh_len; int dot_len = 0; - if (!info_mode) - return event_len; - if (fanotify_is_error_event(event->mask)) event_len += FANOTIFY_ERROR_INFO_LEN; @@ -176,14 +174,17 @@ static size_t fanotify_event_len(unsigned int info_mode, dot_len = 1; } - if (info_mode & FAN_REPORT_PIDFD) - event_len += FANOTIFY_PIDFD_INFO_HDR_LEN; - if (fanotify_event_has_object_fh(event)) { fh_len = fanotify_event_object_fh_len(event); event_len += fanotify_fid_info_len(fh_len, dot_len); } + if (info_mode & FAN_REPORT_PIDFD) + event_len += FANOTIFY_PIDFD_INFO_LEN; + + if (fanotify_event_has_access_range(event)) + event_len += FANOTIFY_RANGE_INFO_LEN; + return event_len; } @@ -258,12 +259,11 @@ static int create_fd(struct fsnotify_group *group, const struct path *path, return client_fd; /* - * we need a new file handle for the userspace program so it can read even if it was - * originally opened O_WRONLY. + * We provide an fd for the userspace program, so it could access the + * file without generating fanotify events itself. */ - new_file = dentry_open(path, - group->fanotify_data.f_flags | __FMODE_NONOTIFY, - current_cred()); + new_file = dentry_open_nonotify(path, group->fanotify_data.f_flags, + current_cred()); if (IS_ERR(new_file)) { put_unused_fd(client_fd); client_fd = PTR_ERR(new_file); @@ -327,11 +327,12 @@ static int process_access_response(struct fsnotify_group *group, struct fanotify_perm_event *event; int fd = response_struct->fd; u32 response = response_struct->response; + int errno = fanotify_get_response_errno(response); int ret = info_len; struct fanotify_response_info_audit_rule friar; - pr_debug("%s: group=%p fd=%d response=%u buf=%p size=%zu\n", __func__, - group, fd, response, info, info_len); + pr_debug("%s: group=%p fd=%d response=%x errno=%d buf=%p size=%zu\n", + __func__, group, fd, response, errno, info, info_len); /* * make sure the response is valid, if invalid we do nothing and either * userspace can send a valid response or we will clean it up after the @@ -342,7 +343,31 @@ static int process_access_response(struct fsnotify_group *group, switch (response & FANOTIFY_RESPONSE_ACCESS) { case FAN_ALLOW: + if (errno) + return -EINVAL; + break; case FAN_DENY: + /* Custom errno is supported only for pre-content groups */ + if (errno && group->priority != FSNOTIFY_PRIO_PRE_CONTENT) + return -EINVAL; + + /* + * Limit errno to values expected on open(2)/read(2)/write(2) + * of regular files. + */ + switch (errno) { + case 0: + case EIO: + case EPERM: + case EBUSY: + case ETXTBSY: + case EAGAIN: + case ENOSPC: + case EDQUOT: + break; + default: + return -EINVAL; + } break; default: return -EINVAL; @@ -506,7 +531,7 @@ static int copy_pidfd_info_to_user(int pidfd, size_t count) { struct fanotify_event_info_pidfd info = { }; - size_t info_len = FANOTIFY_PIDFD_INFO_HDR_LEN; + size_t info_len = FANOTIFY_PIDFD_INFO_LEN; if (WARN_ON_ONCE(info_len > count)) return -EFAULT; @@ -521,6 +546,30 @@ static int copy_pidfd_info_to_user(int pidfd, return info_len; } +static size_t copy_range_info_to_user(struct fanotify_event *event, + char __user *buf, int count) +{ + struct fanotify_perm_event *pevent = FANOTIFY_PERM(event); + struct fanotify_event_info_range info = { }; + size_t info_len = FANOTIFY_RANGE_INFO_LEN; + + if (WARN_ON_ONCE(info_len > count)) + return -EFAULT; + + if (WARN_ON_ONCE(!pevent->ppos)) + return -EINVAL; + + info.hdr.info_type = FAN_EVENT_INFO_TYPE_RANGE; + info.hdr.len = info_len; + info.offset = *(pevent->ppos); + info.count = pevent->count; + + if (copy_to_user(buf, &info, info_len)) + return -EFAULT; + + return info_len; +} + static int copy_info_records_to_user(struct fanotify_event *event, struct fanotify_info *info, unsigned int info_mode, int pidfd, @@ -642,6 +691,15 @@ static int copy_info_records_to_user(struct fanotify_event *event, total_bytes += ret; } + if (fanotify_event_has_access_range(event)) { + ret = copy_range_info_to_user(event, buf, count); + if (ret < 0) + return ret; + buf += ret; + count -= ret; + total_bytes += ret; + } + return total_bytes; } @@ -756,12 +814,10 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, buf += FAN_EVENT_METADATA_LEN; count -= FAN_EVENT_METADATA_LEN; - if (info_mode) { - ret = copy_info_records_to_user(event, info, info_mode, pidfd, - buf, count); - if (ret < 0) - goto out_close_fd; - } + ret = copy_info_records_to_user(event, info, info_mode, pidfd, + buf, count); + if (ret < 0) + goto out_close_fd; if (f) fd_install(fd, f); @@ -1294,7 +1350,7 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group) } static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark, - unsigned int fan_flags) + __u32 mask, unsigned int fan_flags) { /* * Non evictable mark cannot be downgraded to evictable mark. @@ -1321,6 +1377,11 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark, fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY) return -EEXIST; + /* For now pre-content events are not generated for directories */ + mask |= fsn_mark->mask; + if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR) + return -EEXIST; + return 0; } @@ -1347,7 +1408,7 @@ static int fanotify_add_mark(struct fsnotify_group *group, /* * Check if requested mark flags conflict with an existing mark flags. */ - ret = fanotify_may_update_existing_mark(fsn_mark, fan_flags); + ret = fanotify_may_update_existing_mark(fsn_mark, mask, fan_flags); if (ret) goto out; @@ -1409,6 +1470,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) unsigned int fid_mode = flags & FANOTIFY_FID_BITS; unsigned int class = flags & FANOTIFY_CLASS_BITS; unsigned int internal_flags = 0; + struct file *file; pr_debug("%s: flags=%x event_f_flags=%x\n", __func__, flags, event_f_flags); @@ -1477,7 +1539,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) (!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID))) return -EINVAL; - f_flags = O_RDWR | __FMODE_NONOTIFY; + f_flags = O_RDWR; if (flags & FAN_CLOEXEC) f_flags |= O_CLOEXEC; if (flags & FAN_NONBLOCK) @@ -1555,10 +1617,18 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) goto out_destroy_group; } - fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); + fd = get_unused_fd_flags(f_flags); if (fd < 0) goto out_destroy_group; + file = anon_inode_getfile_fmode("[fanotify]", &fanotify_fops, group, + f_flags, FMODE_NONOTIFY); + if (IS_ERR(file)) { + put_unused_fd(fd); + fd = PTR_ERR(file); + goto out_destroy_group; + } + fd_install(fd, file); return fd; out_destroy_group: @@ -1638,12 +1708,24 @@ static int fanotify_events_supported(struct fsnotify_group *group, unsigned int flags) { unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; + bool is_dir = d_is_dir(path->dentry); /* Strict validation of events in non-dir inode mask with v5.17+ APIs */ bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) || (mask & FAN_RENAME) || (flags & FAN_MARK_IGNORE); /* + * Filesystems need to opt-into pre-content evnets (a.k.a HSM) + * and they are only supported on regular files and directories. + */ + if (mask & FANOTIFY_PRE_CONTENT_EVENTS) { + if (!(path->mnt->mnt_sb->s_iflags & SB_I_ALLOW_HSM)) + return -EOPNOTSUPP; + if (!is_dir && !d_is_reg(path->dentry)) + return -EINVAL; + } + + /* * Some filesystems such as 'proc' acquire unusual locks when opening * files. For them fanotify permission events have high chances of * deadlocking the system - open done when reporting fanotify event @@ -1675,7 +1757,7 @@ static int fanotify_events_supported(struct fsnotify_group *group, * but because we always allowed it, error only when using new APIs. */ if (strict_dir_events && mark_type == FAN_MARK_INODE && - !d_is_dir(path->dentry) && (mask & FANOTIFY_DIRONLY_EVENT_BITS)) + !is_dir && (mask & FANOTIFY_DIRONLY_EVENT_BITS)) return -ENOTDIR; return 0; @@ -1776,10 +1858,14 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, return -EPERM; /* - * Permission events require minimum priority FAN_CLASS_CONTENT. + * Permission events are not allowed for FAN_CLASS_NOTIF. + * Pre-content permission events are not allowed for FAN_CLASS_CONTENT. */ if (mask & FANOTIFY_PERM_EVENTS && - group->priority < FSNOTIFY_PRIO_CONTENT) + group->priority == FSNOTIFY_PRIO_NORMAL) + return -EINVAL; + else if (mask & FANOTIFY_PRE_CONTENT_EVENTS && + group->priority == FSNOTIFY_PRIO_CONTENT) return -EINVAL; if (mask & FAN_FS_ERROR && @@ -1814,6 +1900,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME)) return -EINVAL; + /* Pre-content events are not currently generated for directories. */ + if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR) + return -EINVAL; + if (mark_cmd == FAN_MARK_FLUSH) { if (mark_type == FAN_MARK_MOUNT) fsnotify_clear_vfsmount_marks_by_group(group); diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index f976949d2634..8ee495a58d0a 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -193,7 +193,7 @@ static bool fsnotify_event_needs_parent(struct inode *inode, __u32 mnt_mask, return mask & marks_mask; } -/* Are there any inode/mount/sb objects that are interested in this event? */ +/* Are there any inode/mount/sb objects that watch for these events? */ static inline bool fsnotify_object_watched(struct inode *inode, __u32 mnt_mask, __u32 mask) { @@ -203,6 +203,24 @@ static inline bool fsnotify_object_watched(struct inode *inode, __u32 mnt_mask, return mask & marks_mask & ALL_FSNOTIFY_EVENTS; } +/* Report pre-content event with optional range info */ +int fsnotify_pre_content(const struct path *path, const loff_t *ppos, + size_t count) +{ + struct file_range range; + + /* Report page aligned range only when pos is known */ + if (!ppos) + return fsnotify_path(path, FS_PRE_ACCESS); + + range.path = path; + range.pos = PAGE_ALIGN_DOWN(*ppos); + range.count = PAGE_ALIGN(*ppos + count) - range.pos; + + return fsnotify_parent(path->dentry, FS_PRE_ACCESS, &range, + FSNOTIFY_EVENT_FILE_RANGE); +} + /* * Notify this dentry's parent about a child's events with child name info * if parent is watching or if inode/sb/mount are interested in events with @@ -623,11 +641,72 @@ out: } EXPORT_SYMBOL_GPL(fsnotify); +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS +/* + * At open time we check fsnotify_sb_has_priority_watchers() and set the + * FMODE_NONOTIFY_ mode bits accordignly. + * Later, fsnotify permission hooks do not check if there are permission event + * watches, but that there were permission event watches at open time. + */ +void file_set_fsnotify_mode(struct file *file) +{ + struct dentry *dentry = file->f_path.dentry, *parent; + struct super_block *sb = dentry->d_sb; + __u32 mnt_mask, p_mask; + + /* Is it a file opened by fanotify? */ + if (FMODE_FSNOTIFY_NONE(file->f_mode)) + return; + + /* + * Permission events is a super set of pre-content events, so if there + * are no permission event watchers, there are also no pre-content event + * watchers and this is implied from the single FMODE_NONOTIFY_PERM bit. + */ + if (likely(!fsnotify_sb_has_priority_watchers(sb, + FSNOTIFY_PRIO_CONTENT))) { + file->f_mode |= FMODE_NONOTIFY_PERM; + return; + } + + /* + * If there are permission event watchers but no pre-content event + * watchers, set FMODE_NONOTIFY | FMODE_NONOTIFY_PERM to indicate that. + */ + if ((!d_is_dir(dentry) && !d_is_reg(dentry)) || + likely(!fsnotify_sb_has_priority_watchers(sb, + FSNOTIFY_PRIO_PRE_CONTENT))) { + file->f_mode |= FMODE_NONOTIFY | FMODE_NONOTIFY_PERM; + return; + } + + /* + * OK, there are some pre-content watchers. Check if anybody is + * watching for pre-content events on *this* file. + */ + mnt_mask = READ_ONCE(real_mount(file->f_path.mnt)->mnt_fsnotify_mask); + if (unlikely(fsnotify_object_watched(d_inode(dentry), mnt_mask, + FSNOTIFY_PRE_CONTENT_EVENTS))) + return; + + /* Is parent watching for pre-content events on this file? */ + if (dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED) { + parent = dget_parent(dentry); + p_mask = fsnotify_inode_watches_children(d_inode(parent)); + dput(parent); + if (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS) + return; + } + /* Nobody watching for pre-content events from this file */ + file->f_mode |= FMODE_NONOTIFY | FMODE_NONOTIFY_PERM; +} +#endif + static __init int fsnotify_init(void) { int ret; - BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23); + BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 24); ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) diff --git a/fs/open.c b/fs/open.c index ffcfef67ac86..0a5d2f6061c6 100644 --- a/fs/open.c +++ b/fs/open.c @@ -81,14 +81,18 @@ long vfs_truncate(const struct path *path, loff_t length) if (!S_ISREG(inode->i_mode)) return -EINVAL; - error = mnt_want_write(path->mnt); - if (error) - goto out; - idmap = mnt_idmap(path->mnt); error = inode_permission(idmap, inode, MAY_WRITE); if (error) - goto mnt_drop_write_and_out; + return error; + + error = fsnotify_truncate_perm(path, length); + if (error) + return error; + + error = mnt_want_write(path->mnt); + if (error) + return error; error = -EPERM; if (IS_APPEND(inode)) @@ -114,7 +118,7 @@ put_write_and_out: put_write_access(inode); mnt_drop_write_and_out: mnt_drop_write(path->mnt); -out: + return error; } EXPORT_SYMBOL_GPL(vfs_truncate); @@ -175,11 +179,18 @@ long do_ftruncate(struct file *file, loff_t length, int small) /* Check IS_APPEND on real upper inode */ if (IS_APPEND(file_inode(file))) return -EPERM; - sb_start_write(inode->i_sb); + error = security_file_truncate(file); - if (!error) - error = do_truncate(file_mnt_idmap(file), dentry, length, - ATTR_MTIME | ATTR_CTIME, file); + if (error) + return error; + + error = fsnotify_truncate_perm(&file->f_path, length); + if (error) + return error; + + sb_start_write(inode->i_sb); + error = do_truncate(file_mnt_idmap(file), dentry, length, + ATTR_MTIME | ATTR_CTIME, file); sb_end_write(inode->i_sb); return error; @@ -894,7 +905,7 @@ static int do_dentry_open(struct file *f, f->f_sb_err = file_sample_sb_err(f); if (unlikely(f->f_flags & O_PATH)) { - f->f_mode = FMODE_PATH | FMODE_OPENED; + f->f_mode = FMODE_PATH | FMODE_OPENED | FMODE_NONOTIFY; f->f_op = &empty_fops; return 0; } @@ -922,6 +933,12 @@ static int do_dentry_open(struct file *f, if (error) goto cleanup_all; + /* + * Set FMODE_NONOTIFY_* bits according to existing permission watches. + * If FMODE_NONOTIFY was already set for an fanotify fd, this doesn't + * change anything. + */ + file_set_fsnotify_mode(f); error = fsnotify_open_perm(f); if (error) goto cleanup_all; @@ -1098,6 +1115,23 @@ struct file *dentry_open(const struct path *path, int flags, } EXPORT_SYMBOL(dentry_open); +struct file *dentry_open_nonotify(const struct path *path, int flags, + const struct cred *cred) +{ + struct file *f = alloc_empty_file(flags, cred); + if (!IS_ERR(f)) { + int error; + + f->f_mode |= FMODE_NONOTIFY; + error = vfs_open(path, f); + if (error) { + fput(f); + f = ERR_PTR(error); + } + } + return f; +} + /** * dentry_create - Create and open a file * @path: path to create @@ -1195,7 +1229,7 @@ inline struct open_how build_open_how(int flags, umode_t mode) inline int build_open_flags(const struct open_how *how, struct open_flags *op) { u64 flags = how->flags; - u64 strip = __FMODE_NONOTIFY | O_CLOEXEC; + u64 strip = O_CLOEXEC; int lookup_flags = 0; int acc_mode = ACC_MODE(flags); @@ -1203,9 +1237,7 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op) "struct open_flags doesn't yet handle flags > 32 bits"); /* - * Strip flags that either shouldn't be set by userspace like - * FMODE_NONOTIFY or that aren't relevant in deter |