// SPDX-License-Identifier: GPL-2.0-only
/*
*
* Copyright (C) 2011 Novell Inc.
*/
#include <uapi/linux/magic.h>
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/xattr.h>
#include <linux/mount.h>
#include <linux/parser.h>
#include <linux/module.h>
#include <linux/statfs.h>
#include <linux/seq_file.h>
#include <linux/posix_acl_xattr.h>
#include <linux/exportfs.h>
#include <linux/file.h>
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
#include "overlayfs.h"
#include "params.h"
MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Overlay filesystem");
MODULE_LICENSE("GPL");
struct ovl_dir_cache;
static struct dentry *ovl_d_real(struct dentry *dentry,
const struct inode *inode)
{
struct dentry *real = NULL, *lower;
int err;
/* It's an overlay file */
if (inode && d_inode(dentry) == inode)
return dentry;
if (!d_is_reg(dentry)) {
if (!inode || inode == d_inode(dentry))
return dentry;
goto bug;
}
real = ovl_dentry_upper(dentry);
if (real && (inode == d_inode(real)))
return real;
if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
return real;
/*
* Best effort lazy lookup of lowerdata for !inode case to return
* the real lowerdata dentry. The only current caller of d_real() with
* NULL inode is d_real_inode() from trace_uprobe and this caller is
* likely going to be followed reading from the file, before placing
* uprobes on offset within the file, so lowerdata should be available
* when setting the uprobe.
*/
err = ovl_verify_lowerdata(dentry);
if (err)
goto bug;
lower = ovl_dentry_lowerdata(dentry);
if (!lower)
goto bug;
real = lower;
/* Handle recursion */
real = d_real(real, inode);
if (!inode || inode == d_inode(real))
return real;
bug:
WARN(1, "%s(%pd4, %s:%lu): real dentry (%p/%lu) not found\n",
__func__, dentry, inode ? inode->i_sb->s_id : "NULL",
inode ? inode->i_ino : 0, real,
real && d_inode(real) ? d_inode(real)->i_ino : 0);
return dentry;
}
static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak)
{
int ret = 1;
if (!d)
return 1;
if (weak) {
if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE)
ret = d->d_op->d_weak_revalidate(d, flags);
} else if (d->d_flags & DCACHE_OP_REVALIDATE) {
ret = d->d_op->d_revalidate(d, flags);
if (!ret) {
if (!(flags & LOOKUP_RCU))
d_invalidate(d);
ret = -ESTALE;
}
}
return ret;
}
static int ovl_dentry_revalidate_common(struct dentry *dentry,
unsigned int flags, bool weak)
{
struct ovl_entry *oe;
struct ovl_path *lowerstack;
struct inode *inode = d_inode_rcu(dentry);
struct dentry *upper;
unsigned int i;
int ret = 1;
/* Careful in RCU mode */
if (!inode)
return -ECHILD;
oe = OVL_I_E(inode);
lowerstack = ovl_lowerstack(oe);
upper = ovl_i_dentry_upper(inode);
if (upper)
ret = ovl_revalidate_real(upper, flags, weak);
for (i = 0; ret > 0 && i < ovl_numlower(oe); i++)
ret = ovl_revalidate_real(lowerstack[i].dentry, flags, weak);
return ret;
}
static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
{
return ovl_dentry_revalidate_common(dentry, flags, false);
}
static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
{
return ovl_dentry_revalidate_common(dentry, flags, true);
}
static const struct dentry_operations ovl_dentry_operations = {
.d_real = ovl_d_real,
.d_revalidate = ovl_dentry_revalidate,
.d_weak_revalidate = ovl_dentry_weak_revalidate,
};
static struct kmem_cache *ovl_inode_cachep;
static struct inode *ovl_alloc_inode(struct super_block *sb)
{
struct ovl_inode *oi = alloc_inode_sb(sb, ovl_inode_cachep, GFP_KERNEL);
if (!oi)
return NULL;
oi->cache = NULL;
oi->redirect = NULL;
oi->version = 0;
oi->flags = 0;
oi->__upperdentry = NULL;
oi->lowerdata_redirect = NULL;
oi->oe = NULL;
mutex_init(&oi->lock);
return &oi->vfs_inode;
}
static void ovl_free_inode(struct inode *inode)
{
struct ovl_inode *oi = OVL_I(inode);
kfree(oi->redirect);
kfree(oi->oe);
mutex_destroy(&oi->lock);
kmem_cache_free(ovl_inode_cachep, oi);
}
static void ovl_destroy_inode(struct inode *inode)
{
struct ovl_inode *oi = OVL_I(inode);
dput(oi->__upperdentry);
ovl_stack_put(ovl_lowerstack(oi->oe), ovl_numlower(oi->oe));
if (S_ISDIR(inode->i_mode))
ovl_dir_cache_free(inode);
else
kfree(oi->lowerdata_redirect);
}
static void ovl_put_super(struct super_block *sb)
{
struct ovl_fs *ofs = OVL_FS(sb);
if (ofs)
ovl_free_fs(ofs);
}
/* Sync real dirty inodes in upper filesystem (if it exists) */
static int ovl_sync_fs(struct super_block *sb, int wait)
{
struct ovl_fs *ofs = OVL_FS(sb);
struct super_block *upper_sb;
int ret;
ret = ovl_sync_status(ofs);
/*
* We have to always set the err, because the return value isn't
* checked in syncfs, and instead indirectly return an error via
* the sb's writeback errseq, which VFS inspects after this call.
*/
if (ret < 0) {
errseq_set(&sb->s_wb_err, -EIO);
return -EIO;
}
if (!ret)
return ret;
/*
* Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC).
* All the super blocks will be iterated, including upper_sb.
*
* If this is a syncfs(2) call, then we do need to call
* sync_filesystem() on upper_sb, but enough if we do it when being
* called with wait == 1.
*/
if (!wait)
return 0;
upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
down_read(&upper_sb->s_umount);
ret = sync_filesystem(upper_sb);
up_read(&upper_sb->s_umount);
return ret;
}
/**
* ovl_statfs
* @dentry: The dentry to query
* @buf: The struct kstatfs to fill in with stats
*
* Get the filesystem statistics. As writes always target the upper layer
* filesystem pass the statfs to the upper filesystem (if it exists)
*/
static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
struct ovl_fs *ofs = OVL_FS(sb);
struct dentry *root_dentry = sb->s_root;
struct path path;
int err;
ovl_path_real(root_dentry, &path);
err = vfs_statfs(&path, buf);
if (!err) {
buf->f_namelen = ofs->namelen;
buf->f_type = OVERLAYFS_SUPER_M
|