diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-24 09:13:50 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-24 09:13:50 -0700 |
| commit | 99c21beaab2db53d1ba17102b7cedc7a584dfe23 (patch) | |
| tree | d10418e8a7a1504ad5626d95e28cfcc507375dd2 | |
| parent | c4cff1ea37ac5684efc55d3e14ea8350893b3f4d (diff) | |
| parent | 4dec4f91359c456a5eea26817ea151b42953432e (diff) | |
| download | linux-99c21beaab2db53d1ba17102b7cedc7a584dfe23.tar.gz linux-99c21beaab2db53d1ba17102b7cedc7a584dfe23.tar.bz2 linux-99c21beaab2db53d1ba17102b7cedc7a584dfe23.zip | |
Merge tag 'vfs-6.15-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull misc vfs updates from Christian Brauner:
"Features:
- Add CONFIG_DEBUG_VFS infrastucture:
- Catch invalid modes in open
- Use the new debug macros in inode_set_cached_link()
- Use debug-only asserts around fd allocation and install
- Place f_ref to 3rd cache line in struct file to resolve false
sharing
Cleanups:
- Start using anon_inode_getfile_fmode() helper in various places
- Don't take f_lock during SEEK_CUR if exclusion is guaranteed by
f_pos_lock
- Add unlikely() to kcmp()
- Remove legacy ->remount_fs method from ecryptfs after port to the
new mount api
- Remove invalidate_inodes() in favour of evict_inodes()
- Simplify ep_busy_loopER by removing unused argument
- Avoid mmap sem relocks when coredumping with many missing pages
- Inline getname()
- Inline new_inode_pseudo() and de-staticize alloc_inode()
- Dodge an atomic in putname if ref == 1
- Consistently deref the files table with rcu_dereference_raw()
- Dedup handling of struct filename init and refcounts bumps
- Use wq_has_sleeper() in end_dir_add()
- Drop the lock trip around I_NEW wake up in evict()
- Load the ->i_sb pointer once in inode_sb_list_{add,del}
- Predict not reaching the limit in alloc_empty_file()
- Tidy up do_sys_openat2() with likely/unlikely
- Call inode_sb_list_add() outside of inode hash lock
- Sort out fd allocation vs dup2 race commentary
- Turn page_offset() into a wrapper around folio_pos()
- Remove locking in exportfs around ->get_parent() call
- try_lookup_one_len() does not need any locks in autofs
- Fix return type of several functions from long to int in open
- Fix return type of several functions from long to int in ioctls
Fixes:
- Fix watch queue accounting mismatch"
* tag 'vfs-6.15-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (30 commits)
fs: sort out fd allocation vs dup2 race commentary, take 2
fs: call inode_sb_list_add() outside of inode hash lock
fs: tidy up do_sys_openat2() with likely/unlikely
fs: predict not reaching the limit in alloc_empty_file()
fs: load the ->i_sb pointer once in inode_sb_list_{add,del}
fs: drop the lock trip around I_NEW wake up in evict()
fs: use wq_has_sleeper() in end_dir_add()
VFS/autofs: try_lookup_one_len() does not need any locks
fs: dedup handling of struct filename init and refcounts bumps
fs: consistently deref the files table with rcu_dereference_raw()
exportfs: remove locking around ->get_parent() call.
fs: use debug-only asserts around fd allocation and install
fs: dodge an atomic in putname if ref == 1
vfs: Remove invalidate_inodes()
ecryptfs: remove NULL remount_fs from super_operations
watch_queue: fix pipe accounting mismatch
fs: place f_ref to 3rd cache line in struct file to resolve false sharing
epoll: simplify ep_busy_loop by removing always 0 argument
fs: Turn page_offset() into a wrapper around folio_pos()
kcmp: improve performance adding an unlikely hint to task comparisons
...
36 files changed, 339 insertions, 258 deletions
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 1639e78e3146..12a71ba221b8 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1157,3 +1157,8 @@ in normal case it points into the pathname being looked up. NOTE: if you need something like full path from the root of filesystem, you are still on your own - this assists with simple cases, but it's not magic. + +--- + +** mandatory ** +invalidate_inodes() is gone use evict_inodes() instead. diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 2e94d20c4ac7..b735f4c2fe5e 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -27,9 +27,10 @@ static int mte_dump_tag_range(struct coredump_params *cprm, int ret = 1; unsigned long addr; void *tags = NULL; + int locked = 0; for (addr = start; addr < start + len; addr += PAGE_SIZE) { - struct page *page = get_dump_page(addr); + struct page *page = get_dump_page(addr, &locked); /* * get_dump_page() returns NULL when encountering an empty diff --git a/arch/powerpc/platforms/pseries/papr-vpd.c b/arch/powerpc/platforms/pseries/papr-vpd.c index 1574176e3ffc..c86950d7105a 100644 --- a/arch/powerpc/platforms/pseries/papr-vpd.c +++ b/arch/powerpc/platforms/pseries/papr-vpd.c @@ -482,14 +482,13 @@ static long papr_vpd_create_handle(struct papr_location_code __user *ulc) goto free_blob; } - file = anon_inode_getfile("[papr-vpd]", &papr_vpd_handle_ops, - (void *)blob, O_RDONLY); + file = anon_inode_getfile_fmode("[papr-vpd]", &papr_vpd_handle_ops, + (void *)blob, O_RDONLY, + FMODE_LSEEK | FMODE_PREAD); if (IS_ERR(file)) { err = PTR_ERR(file); goto put_fd; } - - file->f_mode |= FMODE_LSEEK | FMODE_PREAD; fd_install(fd, file); return fd; put_fd: diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c index 49559605177e..c321d442f0da 100644 --- a/drivers/vfio/group.c +++ b/drivers/vfio/group.c @@ -266,24 +266,12 @@ static struct file *vfio_device_open_file(struct vfio_device *device) if (ret) goto err_free; - /* - * We can't use anon_inode_getfd() because we need to modify - * the f_mode flags directly to allow more than just ioctls - */ - filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, - df, O_RDWR); + filep = anon_inode_getfile_fmode("[vfio-device]", &vfio_device_fops, + df, O_RDWR, FMODE_PREAD | FMODE_PWRITE); if (IS_ERR(filep)) { ret = PTR_ERR(filep); goto err_close_device; } - - /* - * TODO: add an anon_inode interface to do this. - * Appears to be missing by lack of need rather than - * explicitly prevented. Now there's need. - */ - filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE); - /* * Use the pseudo fs inode on the device to link all mmaps * to the same address space, allowing us to unmap all vmas diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c index 6d57efbb8110..c5a6aae12d2c 100644 --- a/fs/autofs/dev-ioctl.c +++ b/fs/autofs/dev-ioctl.c @@ -442,7 +442,6 @@ static int autofs_dev_ioctl_timeout(struct file *fp, sbi->exp_timeout = timeout * HZ; } else { struct dentry *base = fp->f_path.dentry; - struct inode *inode = base->d_inode; int path_len = param->size - AUTOFS_DEV_IOCTL_SIZE - 1; struct dentry *dentry; struct autofs_info *ino; @@ -460,9 +459,7 @@ static int autofs_dev_ioctl_timeout(struct file *fp, "the parent autofs mount timeout which could " "prevent shutdown\n"); - inode_lock_shared(inode); dentry = try_lookup_one_len(param->path, base, path_len); - inode_unlock_shared(inode); if (IS_ERR_OR_NULL(dentry)) return dentry ? PTR_ERR(dentry) : -ENOENT; ino = autofs_dentry_ino(dentry); diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index fe3de9ad57bf..d9bc67176128 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -317,8 +317,9 @@ static int cachefiles_ondemand_get_fd(struct cachefiles_req *req, goto err_free_id; } - anon_file->file = anon_inode_getfile("[cachefiles]", - &cachefiles_ondemand_fd_fops, object, O_WRONLY); + anon_file->file = anon_inode_getfile_fmode("[cachefiles]", + &cachefiles_ondemand_fd_fops, object, + O_WRONLY, FMODE_PWRITE | FMODE_LSEEK); if (IS_ERR(anon_file->file)) { ret = PTR_ERR(anon_file->file); goto err_put_fd; @@ -333,8 +334,6 @@ static int cachefiles_ondemand_get_fd(struct cachefiles_req *req, goto err_put_file; } - anon_file->file->f_mode |= FMODE_PWRITE | FMODE_LSEEK; - load = (void *)req->msg.data; load->fd = anon_file->fd; object->ondemand->ondemand_id = object_id; diff --git a/fs/coredump.c b/fs/coredump.c index 4375c70144d0..d6a92cd6018e 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -926,14 +926,23 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start, { unsigned long addr; struct page *dump_page; + int locked, ret; dump_page = dump_page_alloc(); if (!dump_page) return 0; + ret = 0; + locked = 0; for (addr = start; addr < start + len; addr += PAGE_SIZE) { struct page *page; + if (!locked) { + if (mmap_read_lock_killable(current->mm)) + goto out; + locked = 1; + } + /* * To avoid having to allocate page tables for virtual address * ranges that have never been used yet, and also to make it @@ -941,21 +950,38 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start, * NULL when encountering an empty page table entry that would * otherwise have been filled with the zero page. */ - page = get_dump_page(addr); + page = get_dump_page(addr, &locked); if (page) { + if (locked) { + mmap_read_unlock(current->mm); + locked = 0; + } int stop = !dump_emit_page(cprm, dump_page_copy(page, dump_page)); put_page(page); - if (stop) { - dump_page_free(dump_page); - return 0; - } + if (stop) + goto out; } else { dump_skip(cprm, PAGE_SIZE); } + + if (dump_interrupted()) + goto out; + + if (!need_resched()) + continue; + if (locked) { + mmap_read_unlock(current->mm); + locked = 0; + } cond_resched(); } + ret = 1; +out: + if (locked) + mmap_read_unlock(current->mm); + dump_page_free(dump_page); - return 1; + return ret; } #endif diff --git a/fs/dcache.c b/fs/dcache.c index e3634916ffb9..3ee84f62827a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2480,7 +2480,8 @@ static inline void end_dir_add(struct inode *dir, unsigned int n, { smp_store_release(&dir->i_dir_seq, n + 2); preempt_enable_nested(); - wake_up_all(d_wait); + if (wq_has_sleeper(d_wait)) + wake_up_all(d_wait); } static void d_wait_lookup(struct dentry *dentry) diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 0b1c878317ab..e7b7f426fecf 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -172,7 +172,6 @@ const struct super_operations ecryptfs_sops = { .destroy_inode = ecryptfs_destroy_inode, .free_inode = ecryptfs_free_inode, .statfs = ecryptfs_statfs, - .remount_fs = NULL, .evict_inode = ecryptfs_evict_inode, .show_options = ecryptfs_show_options }; diff --git a/fs/eventfd.c b/fs/eventfd.c index 76129bfcd663..af42b2c7d235 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -406,14 +406,13 @@ static int do_eventfd(unsigned int count, int flags) if (fd < 0) goto err; - file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx, flags); + file = anon_inode_getfile_fmode("[eventfd]", &eventfd_fops, + ctx, flags, FMODE_NOWAIT); if (IS_ERR(file)) { put_unused_fd(fd); fd = PTR_ERR(file); goto err; } - - file->f_mode |= FMODE_NOWAIT; fd_install(fd, file); return fd; err: diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 7c0980db77b3..1fc770270ab8 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -438,7 +438,7 @@ static bool ep_busy_loop_end(void *p, unsigned long start_time) * * we must do our busy polling with irqs enabled */ -static bool ep_busy_loop(struct eventpoll *ep, int nonblock) +static bool ep_busy_loop(struct eventpoll *ep) { unsigned int napi_id = READ_ONCE(ep->napi_id); u16 budget = READ_ONCE(ep->busy_poll_budget); @@ -448,7 +448,7 @@ static bool ep_busy_loop(struct eventpoll *ep, int nonblock) budget = BUSY_POLL_BUDGET; if (napi_id >= MIN_NAPI_ID && ep_busy_loop_on(ep)) { - napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, + napi_busy_loop(napi_id, ep_busy_loop_end, ep, prefer_busy_poll, budget); if (ep_events_available(ep)) return true; @@ -560,7 +560,7 @@ static void ep_resume_napi_irqs(struct eventpoll *ep) #else -static inline bool ep_busy_loop(struct eventpoll *ep, int nonblock) +static inline bool ep_busy_loop(struct eventpoll *ep) { return false; } @@ -2047,7 +2047,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, if (timed_out) return 0; - eavail = ep_busy_loop(ep, timed_out); + eavail = ep_busy_loop(ep); if (eavail) continue; diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 0c899cfba578..b5845c4846b8 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -126,10 +126,8 @@ static struct dentry *reconnect_one(struct vfsmount *mnt, int err; parent = ERR_PTR(-EACCES); - inode_lock(dentry->d_inode); if (mnt->mnt_sb->s_export_op->get_parent) parent = mnt->mnt_sb->s_export_op->get_parent(dentry); - inode_unlock(dentry->d_inode); if (IS_ERR(parent)) { dprintk("get_parent of %lu failed, err %ld\n", diff --git a/fs/file.c b/fs/file.c index d868cdb95d1e..40fed4501aab 100644 --- a/fs/file.c +++ b/fs/file.c @@ -418,17 +418,25 @@ struct files_struct *dup_fd(struct files_struct *oldf, struct fd_range *punch_ho old_fds = old_fdt->fd; new_fds = new_fdt->fd; + /* + * We may be racing against fd allocation from other threads using this + * files_struct, despite holding ->file_lock. + * + * alloc_fd() might have already claimed a slot, while fd_install() + * did not populate it yet. Note the latter operates locklessly, so + * the file can show up as we are walking the array below. + * + * At the same time we know no files will disappear as all other + * operations take the lock. + * + * Instead of trying to placate userspace racing with itself, we + * ref the file if we see it and mark the fd slot as unused otherwise. + */ for (i = open_files; i != 0; i--) { - struct file *f = *old_fds++; + struct file *f = rcu_dereference_raw(*old_fds++); if (f) { get_file(f); } else { - /* - * The fd may be claimed in the fd bitmap but not yet - * instantiated in the files array if a sibling thread - * is partway through open(). So make sure that this - * fd is available to the new process. - */ __clear_open_fd(open_files - i, new_fdt); } rcu_assign_pointer(*new_fds++, f); @@ -577,6 +585,7 @@ repeat: __set_open_fd(fd, fdt, flags & O_CLOEXEC); error = fd; + VFS_BUG_ON(rcu_access_pointer(fdt->fd[fd]) != NULL); out: spin_unlock(&files->file_lock); @@ -612,22 +621,14 @@ void put_unused_fd(unsigned int fd) EXPORT_SYMBOL(put_unused_fd); -/* - * Install a file pointer in the fd array. - * - * The VFS is full of places where we drop the files lock between - * setting the open_fds bitmap and installing the file in the file - * array. At any such point, we are vulnerable to a dup2() race - * installing a file in the array before us. We need to detect this and - * fput() the struct file we are about to overwrite in this case. - * - * It should never happen - if we allow dup2() do it, _really_ bad things - * will follow. +/** + * fd_install - install a file pointer in the fd array + * @fd: file descriptor to install the file in + * @file: the file to install * * This consumes the "file" refcount, so callers should treat it * as if they had called fput(file). */ - void fd_install(unsigned int fd, struct file *file) { struct files_struct *files = current->files; @@ -642,7 +643,7 @@ void fd_install(unsigned int fd, struct file *file) rcu_read_unlock_sched(); spin_lock(&files->file_lock); fdt = files_fdtable(files); - WARN_ON(fdt->fd[fd] != NULL); + VFS_BUG_ON(rcu_access_pointer(fdt->fd[fd]) != NULL); rcu_assign_pointer(fdt->fd[fd], file); spin_unlock(&files->file_lock); return; @@ -650,7 +651,7 @@ void fd_install(unsigned int fd, struct file *file) /* coupled with smp_wmb() in expand_fdtable() */ smp_rmb(); fdt = rcu_dereference_sched(files->fdt); - BUG_ON(fdt->fd[fd] != NULL); + VFS_BUG_ON(rcu_access_pointer(fdt->fd[fd]) != NULL); rcu_assign_pointer(fdt->fd[fd], file); rcu_read_unlock_sched(); } @@ -679,7 +680,7 @@ struct file *file_close_fd_locked(struct files_struct *files, unsigned fd) return NULL; fd = array_index_nospec(fd, fdt->max_fds); - file = fdt->fd[fd]; + file = rcu_dereference_raw(fdt->fd[fd]); if (file) { rcu_assign_pointer(fdt->fd[fd], NULL); __put_unused_fd(files, fd); @@ -1182,6 +1183,16 @@ static inline bool file_needs_f_pos_lock(struct file *file) (file_count(file) > 1 || file->f_op->iterate_shared); } +bool file_seek_cur_needs_f_lock(struct file *file) +{ + if (!(file->f_mode & FMODE_ATOMIC_POS) && !file->f_op->iterate_shared) + return false; + + VFS_WARN_ON_ONCE((file_count(file) > 1) && + !mutex_is_locked(&file->f_pos_lock)); + return true; +} + struct fd fdget_pos(unsigned int fd) { struct fd f = fdget(fd); @@ -1230,14 +1241,34 @@ __releases(&files->file_lock) struct fdtable *fdt; /* - * We need to detect attempts to do dup2() over allocated but still - * not finished descriptor. + * dup2() is expected to close the file installed in the target fd slot + * (if any). However, userspace hand-picking a fd may be racing against + * its own threads which happened to allocate it in open() et al but did + * not populate it yet. + * + * Broadly speaking we may be racing against the following: + * fd = get_unused_fd_flags(); // fd slot reserved, ->fd[fd] == NULL + * file = hard_work_goes_here(); + * fd_install(fd, file); // only now ->fd[fd] == file + * + * It is an invariant that a successfully allocated fd has a NULL entry + * in the array until the matching fd_install(). + * + * If we fit the window, we have the fd to populate, yet no target file + * to close. Trying to ignore it and install our new file would violate + * the invariant and make fd_install() overwrite our file. + * + * Things can be done(tm) to handle this. However, the issue does not + * concern legitimate programs and we only need to make sure the kernel + * does not trip over it. + * + * The simplest way out is to return an error if we find ourselves here. * * POSIX is silent on the issue, we return -EBUSY. */ fdt = files_fdtable(files); fd = array_index_nospec(fd, fdt->max_fds); - tofree = fdt->fd[fd]; + tofree = rcu_dereference_raw(fdt->fd[fd]); if (!tofree && fd_is_open(fd, fdt)) goto Ebusy; get_file(file); diff --git a/fs/file_table.c b/fs/file_table.c index 5c00dc38558d..9f0a1a164c82 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -221,7 +221,8 @@ struct file *alloc_empty_file(int flags, const struct cred *cred) /* * Privileged users can go above max_files */ - if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) { + if (unlikely(get_nr_files() >= files_stat.max_files) && + !capable(CAP_SYS_ADMIN)) { /* * percpu_counters are inaccurate. Do an expensive check before * we go and fail. diff --git a/fs/inode.c b/fs/inode.c index 5587aabdaa5e..99318b157a9a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -327,7 +327,17 @@ static void i_callback(struct rcu_head *head) free_inode_nonrcu(inode); } -static struct inode *alloc_inode(struct super_block *sb) +/** + * alloc_inode - obtain an inode + * @sb: superblock + * + * Allocates a new inode for given superblock. + * Inode wont be chained in superblock s_inodes list + * This means : + * - fs can't be unmount + * - quotas, fsnotify, writeback can't work + */ +struct inode *alloc_inode(struct super_block *sb) { const struct super_operations *ops = sb->s_op; struct inode *inode; @@ -613,18 +623,22 @@ static void inode_wait_for_lru_isolating(struct inode *inode) */ void inode_sb_list_add(struct inode *inode) { - spin_lock(&inode->i_sb->s_inode_list_lock); - list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); - spin_unlock(&inode->i_sb->s_inode_list_lock); + struct super_block *sb = inode->i_sb; + + spin_lock(&sb->s_inode_list_lock); + list_add(&inode->i_sb_list, &sb->s_inodes); + spin_unlock(&sb->s_inode_list_lock); } EXPORT_SYMBOL_GPL(inode_sb_list_add); static inline void inode_sb_list_del(struct inode *inode) { + struct super_block *sb = inode->i_sb; + if (!list_empty(&inode->i_sb_list)) { - spin_lock(&inode->i_sb->s_inode_list_lock); + spin_lock(&sb->s_inode_list_lock); list_del_init(&inode->i_sb_list); - spin_unlock(&inode->i_sb->s_inode_list_lock); + spin_unlock(&sb->s_inode_list_lock); } } @@ -806,23 +820,16 @@ static void evict(struct inode *inode) /* * Wake up waiters in __wait_on_freeing_inode(). * - * Lockless hash lookup may end up finding the inode before we removed - * it above, but only lock it *after* we are done with the wakeup below. - * In this case the potential waiter cannot safely block. + * It is an invariant that any thread we need to wake up is already + * accounted for before remove_inode_hash() acquires ->i_lock -- both + * sides take the lock and sleep is aborted if the inode is found + * unhashed. Thus either the sleeper wins and goes off CPU, or removal + * wins and the sleeper aborts after testing with the lock. * - * The inode being unhashed after the call to remove_inode_hash() is - * used as an indicator whether blocking on it is safe. + * This also means we don't need any fences for the call below. */ - spin_lock(&inode->i_lock); - /* - * Pairs with the barrier in prepare_to_wait_event() to make sure - * ___wait_var_event() either sees the bit cleared or - * waitqueue_active() check in wake_up_var() sees the waiter. - */ - smp_mb__after_spinlock(); inode_wake_up_bit(inode, __I_NEW); BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); - spin_unlock(&inode->i_lock); destroy_inode(inode); } @@ -900,46 +907,6 @@ again: } EXPORT_SYMBOL_GPL(evict_inodes); -/** - * invalidate_inodes - attempt to free all inodes on a superblock - * @sb: superblock to operate on - * - * Attempts to free all inodes (including dirty inodes) for a given superblock. - */ -void invalidate_inodes(struct super_block *sb) -{ - struct inode *inode, *next; - LIST_HEAD(dispose); - -again: - spin_lock(&sb->s_inode_list_lock); - list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { - spin_lock(&inode->i_lock); - if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { - spin_unlock(&inode->i_lock); - continue; - } - if (atomic_read(&inode->i_count)) { - spin_unlock(&inode->i_lock); - continue; - } - - inode->i_state |= I_FREEING; - inode_lru_list_del(inode); - spin_unlock(&inode->i_lock); - list_add(&inode->i_lru, &dispose); - if (need_resched()) { - spin_unlock(&sb->s_inode_list_lock); - cond_resched(); - dispose_list(&dispose); - goto again; - } - } - spin_unlock(&sb->s_inode_list_lock); - - dispose_list(&dispose); -} - /* * Isolate the inode from the LRU in preparation for freeing it. * @@ -1160,21 +1127,6 @@ unsigned int get_next_ino(void) EXPORT_SYMBOL(get_next_ino); /** - * new_inode_pseudo - obtain an inode - * @sb: superblock - * - * Allocates a new inode for given superblock. - * Inode wont be chained in superblock s_inodes list - * This means : - * - fs can't be unmount - * - quotas, fsnotify, writeback can't work - */ -struct inode *new_inode_pseudo(struct super_block *sb) -{ - return alloc_inode(sb); -} - -/** * new_inode - obtain an inode * @sb: superblock * @@ -1190,7 +1142,7 @@ struct inode *new_inode(struct super_block *sb) { struct inode *inode; - inode = new_inode_pseudo(sb); + inode = alloc_inode(sb); if (inode) inode_sb_list_add(inode); return inode; @@ -1348,8 +1300,8 @@ again: } if (set && unlikely(set(inode, data))) { - inode = NULL; - goto unlock; + spin_unlock(&inode_hash_lock); + return NULL; } /* @@ -1361,14 +1313,14 @@ again: hlist_add_head_rcu(&inode->i_hash, head); spin_unlock(&inode->i_lock); + spin_unlock(&inode_hash_lock); + /* * Add inode to the sb list if it's not already. It has I_NEW at this * point, so it should be safe to test i_sb_list locklessly. */ if (list_empty(&inode->i_sb_list)) inode_sb_list_add(inode); -unlock: - spin_unlock(&inode_hash_lock); return inode; } @@ -1497,8 +1449,8 @@ again: inode->i_state = I_NEW; hlist_add_head_rcu(&inode->i_hash, head); spin_unlock(&inode->i_lock); - inode_sb_list_add(inode); spin_unlock(&inode_hash_lock); + inode_sb_list_add(inode); /* Return the locked inode with I_NEW set, the * caller is responsible for filling in the contents @@ -2953,3 +2905,18 @@ umode_t mode_strip_sgid(struct mnt_idmap *idmap, return mode & ~S_ISGID; } EXPORT_SYMBOL(mode_strip_sgid); + +#ifdef CONFIG_DEBUG_VFS +/* + * Dump an inode. + * + * TODO: add a proper inode dumping routi |
