// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/sort.h>
#include <linux/slab.h>
#include <linux/iversion.h>
#include "super.h"
#include "mds_client.h"
#include <linux/ceph/decode.h>
/* unused map expires after 5 minutes */
#define CEPH_SNAPID_MAP_TIMEOUT (5 * 60 * HZ)
/*
* Snapshots in ceph are driven in large part by cooperation from the
* client. In contrast to local file systems or file servers that
* implement snapshots at a single point in the system, ceph's
* distributed access to storage requires clients to help decide
* whether a write logically occurs before or after a recently created
* snapshot.
*
* This provides a perfect instantanous client-wide snapshot. Between
* clients, however, snapshots may appear to be applied at slightly
* different points in time, depending on delays in delivering the
* snapshot notification.
*
* Snapshots are _not_ file system-wide. Instead, each snapshot
* applies to the subdirectory nested beneath some directory. This
* effectively divides the hierarchy into multiple "realms," where all
* of the files contained by each realm share the same set of
* snapshots. An individual realm's snap set contains snapshots
* explicitly created on that realm, as well as any snaps in its
* parent's snap set _after_ the point at which the parent became it's
* parent (due to, say, a rename). Similarly, snaps from prior parents
* during the time intervals during which they were the parent are included.
*
* The client is spared most of this detail, fortunately... it must only
* maintains a hierarchy of realms reflecting the current parent/child
* realm relationship, and for each realm has an explicit list of snaps
* inherited from prior parents.
*
* A snap_realm struct is maintained for realms containing every inode
* with an open cap in the system. (The needed snap realm information is
* provided by the MDS whenever a cap is issued, i.e., on open.) A 'seq'
* version number is used to ensure that as realm parameters change (new
* snapshot, new parent, etc.) the client's realm hierarchy is updated.
*
* The realm hierarchy drives the generation of a 'snap context' for each
* realm, which simply lists the resulting set of snaps for the realm. This
* is attached to any writes sent to OSDs.
*/
/*
* Unfortunately error handling is a bit mixed here. If we get a snap
* update, but don't have enough memory to update our realm hierarchy,
* it's not clear what we can do about it (besides complaining to the
* console).
*/
/*
* increase ref count for the realm
*
* caller must hold snap_rwsem.
*/
void ceph_get_snap_realm(struct ceph_mds_client *mdsc,
struct ceph_snap_realm *realm)
{
lockdep_assert_held(&mdsc->snap_rwsem);
/*
* The 0->1 and 1->0 transitions must take the snap_empty_lock
* atomically with the refcount change. Go ahead and bump the
* nref here, unless it's 0, in which case we take the spinlock
* and then do the increment and remove it from the list.
*/
if (atomic_inc_not_zero(&realm->nref))
return;
spin_lock(&mdsc->snap_empty_lock);
if (atomic_inc_return(&realm->nref) == 1)
list_del_init(&realm->empty_item);
spin_unlock(&mdsc->snap_empty_lock);
}
static void __insert_snap_realm(struct rb_root *root,
struct ceph_snap_realm *new)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
struct ceph_snap_realm *r = NULL;
while (*p) {
parent = *p;
r = rb_entry(parent, struct ceph_snap_realm, node);
if (new->ino < r->ino)
p = &(*p)->rb_left;
else if (new->ino > r->ino)
p = &(*p)->rb_right;
else
BUG();
}
rb_link_node(&new->node, parent, p);
rb_insert_color(&new->node, root);
}
/*
* create and get the realm rooted at @ino and bump its ref count.
*
* caller must hold snap_rwsem for write.
*/
static struct ceph_snap_realm *ceph_create_snap_realm(
struct ceph_mds_client *mdsc,
u64 ino)
{
struct ceph_snap_realm *realm;
lockdep_assert_held_write(&mdsc->snap_rwsem);
realm = kzalloc(sizeof(*realm), GFP_NOFS);
if (!realm)
return ERR_PTR(-ENOMEM);
atomic_set(&realm->nref, 1); /* for caller */
realm->ino = ino;
INIT_LIST_HEAD(&realm->children);
INIT_LIST_HEAD(&realm->child_item);
INIT_LIST_HEAD(&realm->empty_item);
INIT_LIST_HEAD(&realm->dirty_item);
INIT_LIST_HEAD(&realm->inodes_with_caps);
spin_lock_init(&realm->inodes_with_caps_lock);
__insert_snap_realm(&mdsc->snap_realms, realm);
mdsc->num_snap_realms++;
dout("create_snap_realm %llx %p\n", realm->ino, realm);
return realm;
}
/*
* lookup the realm rooted at @ino.
*
* caller must hold snap_rwsem.
*/
static struct ceph_snap_realm *__lookup_snap_realm(struct ceph_mds_client *mdsc,
u64 ino)
{
struct rb_node *n = mdsc->snap_realms.rb_node;
struct ceph_snap_realm *r;
lockdep_assert_held(&mdsc->snap_rwsem);
while (n) {
r = rb_entry(n, struct ceph_snap_realm, node);
if (ino < r->ino)
n = n->rb_left;
else if (ino > r->ino)
n =