// SPDX-License-Identifier: GPL-2.0
/*
* dax: direct host memory access
* Copyright (C) 2020 Red Hat, Inc.
*/
#include "fuse_i.h"
#include <linux/delay.h>
#include <linux/dax.h>
#include <linux/uio.h>
#include <linux/pagemap.h>
#include <linux/pfn_t.h>
#include <linux/iomap.h>
#include <linux/interval_tree.h>
/*
* Default memory range size. A power of 2 so it agrees with common FUSE_INIT
* map_alignment values 4KB and 64KB.
*/
#define FUSE_DAX_SHIFT 21
#define FUSE_DAX_SZ (1 << FUSE_DAX_SHIFT)
#define FUSE_DAX_PAGES (FUSE_DAX_SZ / PAGE_SIZE)
/* Number of ranges reclaimer will try to free in one invocation */
#define FUSE_DAX_RECLAIM_CHUNK (10)
/*
* Dax memory reclaim threshold in percetage of total ranges. When free
* number of free ranges drops below this threshold, reclaim can trigger
* Default is 20%
*/
#define FUSE_DAX_RECLAIM_THRESHOLD (20)
/** Translation information for file offsets to DAX window offsets */
struct fuse_dax_mapping {
/* Pointer to inode where this memory range is mapped */
struct inode *inode;
/* Will connect in fcd->free_ranges to keep track of free memory */
struct list_head list;
/* For interval tree in file/inode */
struct interval_tree_node itn;
/* Will connect in fc->busy_ranges to keep track busy memory */
struct list_head busy_list;
/** Position in DAX window */
u64 window_offset;
/** Length of mapping, in bytes */
loff_t length;
/* Is this mapping read-only or read-write */
bool writable;
/* reference count when the mapping is used by dax iomap. */
refcount_t refcnt;
};
/* Per-inode dax map */
struct fuse_inode_dax {
/* Semaphore to protect modifications to the dmap tree */
struct rw_semaphore sem;
/* Sorted rb tree of struct fuse_dax_mapping elements */
struct rb_root_cached tree;
unsigned long nr;
};
struct fuse_conn_dax {
/* DAX device */
struct dax_device *dev;
/* Lock protecting accessess to members of this structure */
spinlock_t lock;
/* List of memory ranges which are busy */
unsigned long nr_busy_ranges;
struct list_head busy_ranges;
/* Worker to free up memory ranges */
struct delayed_work free_work;
/* Wait queue for a dax range to become free */
wait_queue_head_t range_waitq;
/* DAX Window Free Ranges */
long nr_free_ranges;
struct list_head free_ranges;
unsigned long nr_ranges;
};
static inline struct fuse_dax_mapping *
node_to_dmap(struct interval_tree_node *node)
{
if (!node)
return NULL;
return container_of(node, struct fuse_dax_mapping, itn);
}
static struct fuse_dax_mapping *
alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode);
static void
__kick_dmap_free_worker(struct fuse_conn_dax *fcd, unsigned long delay_ms)
{
unsigned long free_threshold;
/* If number of free ranges are below threshold, start reclaim */
free_threshold = max_t(unsigned long, fcd->nr_ranges * FUSE_DAX_RECLAIM_THRESHOLD / 100,
1);
if (fcd->nr_free_ranges < free_threshold)
queue_delayed_work(system_long_wq, &fcd->free_work,