// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/super.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
* super.c contains code to handle: - mount structures
* - super-block tables
* - filesystem drivers list
* - mount system call
* - umount system call
* - ustat system call
*
* GK 2/5/95 - Changed to support mounting the root fs via NFS
*
* Added kerneld support: Jacques Gelinas and Bjorn Ekwall
* Added change_root: Werner Almesberger & Hans Lermen, Feb '96
* Added options to /proc/mounts:
* Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
* Added devfs support: Richard Gooch <rgooch@atnf.csiro.au>, 13-JAN-1998
* Heavily rewritten for 'one fs - one tree' dcache architecture. AV, Mar 2000
*/
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/writeback.h> /* for the emergency remount stuff */
#include <linux/idr.h>
#include <linux/mutex.h>
#include <linux/backing-dev.h>
#include <linux/rculist_bl.h>
#include <linux/cleancache.h>
#include <linux/fsnotify.h>
#include <linux/lockdep.h>
#include <linux/user_namespace.h>
#include "internal.h"
static LIST_HEAD(super_blocks);
static DEFINE_SPINLOCK(sb_lock);
static char *sb_writers_name[SB_FREEZE_LEVELS] = {
"sb_writers",
"sb_pagefaults",
"sb_internal",
};
/*
* One thing we have to be careful of with a per-sb shrinker is that we don't
* drop the last active reference to the superblock from within the shrinker.
* If that happens we could trigger unregistering the shrinker from within the
* shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
* take a passive reference to the superblock to avoid this from occurring.
*/
static unsigned long super_cache_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
struct super_block *sb;
long fs_objects = 0;
long total_objects;
long freed = 0;
long dentries;
long inodes;
sb = container_of(shrink, struct super_block, s_shrink);
/*
* Deadlock avoidance. We may hold various FS locks, and we don't want
* to recurse into the FS that called us in clear_inode() and friends..
*/
if (!(sc->gfp_mask & __GFP_FS))
return SHRINK_STOP;
if (!trylock_super(sb))
return SHRINK_STOP;
if (sb->s_op->nr_cached_objects)
fs_objects = sb->s_op->nr_cached_objects(sb, sc);
inodes = list_lru_shrink_count(&sb->s_inode_lru, sc);
dentries = list_lru_shrink_count(&sb->s_dentry_lru, sc);
total_objects = dentries + inodes + fs_objects + 1;
if (!total_objects)
total_objects = 1;
/* proportion the scan between the caches */
dentries = mult_frac(sc->nr_to_scan, dentries, total_objects);
inodes = mult_frac(sc->nr_to_scan, inodes, total_objects);
fs_objects = mult_frac(sc->nr_to_scan, fs_objects, total_objects);
/*
* prune the dcache first as the icache is pinned by it, then
* prune the icache, followed by the filesystem specific caches
*
* Ensure that we always scan at least one object - memcg kmem
* accounting uses this to fully empty the caches.
*/
sc->nr_to_scan = dentries + 1;
freed = prune_dcache_sb(sb, sc);
sc->nr_to_scan = inodes + 1;
freed += prune_icache_sb(sb, sc);
if (fs_objects) {
sc->nr_to_scan = fs_objects + 1;
freed += sb->s_op->free_cached_objects(sb, sc);
}
up_read(&sb->s_umount);
return freed;
}
static unsigned long super_cache_count(struct shrinker *shrink,
struct shrink_control *sc)
{
struct super_block *sb;
long total_objects = 0;
sb = container_of(shrink, struct super_block, s_shrink);
/*
* Don't call trylock_super as it is a potential
* scalability bottleneck. The counts could get updated
* between super_cache_count and super_cache_scan anyway.
* Call to super_cache_count with shrinker_rwsem held
* ensures the safety of call to list_lru_shrink_count() and
* s_op->nr_cached_objects().
*/
if (sb->s_op && sb->s_op->nr_cached_objects)
total_objects = sb->s_op->nr_cached_objects(sb, sc);
total_objects += list_lru_shrink_count(&sb->s_dentry_lru, sc);
total_objects += list_lru_shrink_count(&sb->s_inode_lru, sc);
total_objects = vfs_pressure_ratio(total_objects);
return total_objects;
}
static void destroy_super_work(struct work_struct *work)
{
struct super_block *s = container_of(work, struct super_block,
destroy_work);
int i;
for (i = 0; i < SB_FREEZE_LEVELS; i++)
percpu_free_rwsem(&s->s_writers.rw_sem[i]);
kfree(s);
}
static void destroy_super_rcu(struct rcu_head *head)
{
struct super_block *s = container_of(head, struct super_block, rcu);
INIT_WORK(&s->destroy_work, destroy_super_work);
schedule_work(&s->destroy_work);
}
/* Free a superblock that has never been seen by anyone */
static void destroy_unused_super(struct super_block *s)
{
if (!s)
return;
up_write(&s->s_umount);
list_lru_destroy(&s->s_dentry_lru);
list_lru_destroy(&s->s_inode_lru);
security_sb_free(s);
put_user_ns(s->s_user_ns);
kfree(s->s_subtype);
/* no delays needed */
destroy_super_work(&s->destroy_work);
}
/**
* alloc_super - create new superblock
* @type: filesystem type superblock should belong to
* @flags: the mount flags
* @user_ns: User namespace for the super_block
*
* Allocates and initializes a new &struct super_block. alloc_super()
* returns a pointer new superblock or %NULL if allocation had failed.
*/
static struct super_block *alloc_super(struct file_system_type *type, int flags,
struct user_namespace *user_ns)
{
struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);
static const struct super_operations default_op;
int i;
if (!s)
return NULL;
INIT_LIST_HEAD(&s->s_mounts);
s->s_user_ns = get_user_ns(user_ns);
if (security_sb_alloc(s))
goto fail;
for (i = 0; i < SB_FREEZE_LEVELS; i++) {
if (__percpu_init_rwsem(&s->s_writers.rw_sem[i],
sb_writers_name[i],
&type->s_writers_key[i]))
goto fail;
}
init_waitqueue_head(&s->s_writers.wait_unfrozen);
s->s_bdi = &noop_backing_dev_info;
s->s_flags = flags;
if (s->s_user_ns != &init_user_ns)
s->s_iflags |= SB_I_NODEV;
INIT_HLIST_NODE(&s->s_instances);
INIT_HLIST_BL_HEAD(&s->s_anon);
mutex_init(&s->s_sync_lock);
INIT_LIST_HEAD(&s->s_inodes);
spin_lock_init(&s->s_inode_list_lock);
INIT_LIST_HEAD(&s->s_inodes_wb);
spin_lock_init(&s->s_inode_wblist_lock);
if (list_lru_init_memcg(&s->s_dentry_lru))
goto fail;
if (list_lru_init_memcg(&s->s_inode_lru))
goto fail;
init_rwsem(&s->s_umount);
lockdep_set_class(&s->s_umount, &type->s_umount_key);
/*
* sget() can have s_umount recursion.
*
* When it cannot find a suitable sb, it allocates a new
* one (this one), and tries again to find a suitable old
* one.
*
* In case that succeeds, it will acquire the s_umount
* lock of the old one. Since these are clearly distrinct
* locks, and this object isn't exposed yet, there's no
* risk of deadlocks.
*
* Annotate this by putting this lock in a different
* subclass.
*/
down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
s->s_count = 1;
atomic_set(&s->s_active, 1);
mutex_init(&s->s_vfs_rename_mutex);
lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
init_rwsem(&s->s_dquot.dqio_sem);
s->s_maxbytes = MAX_NON_LFS;
s->s_op = &default_op;
s->s_time_gran = 1000000000;
s->cleancache_pooli
|