// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <linux/rculist.h>
#include <linux/list.h>
#include <linux/hash.h>
#include <linux/types.h>
#include <linux/spinlock.h>
#include <linux/bpf.h>
#include <linux/btf_ids.h>
#include <net/bpf_sk_storage.h>
#include <net/sock.h>
#include <uapi/linux/sock_diag.h>
#include <uapi/linux/btf.h>
#define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE)
DEFINE_BPF_STORAGE_CACHE(sk_cache);
struct bpf_local_storage_map_bucket {
struct hlist_head list;
raw_spinlock_t lock;
};
/* Thp map is not the primary owner of a bpf_local_storage_elem.
* Instead, the container object (eg. sk->sk_bpf_storage) is.
*
* The map (bpf_local_storage_map) is for two purposes
* 1. Define the size of the "local storage". It is
* the map's value_size.
*
* 2. Maintain a list to keep track of all elems such
* that they can be cleaned up during the map destruction.
*
* When a bpf local storage is being looked up for a
* particular object, the "bpf_map" pointer is actually used
* as the "key" to search in the list of elem in
* the respective bpf_local_storage owned by the object.
*
* e.g. sk->sk_bpf_storage is the mini-map with the "bpf_map" pointer
* as the searching key.
*/
struct bpf_local_storage_map {
struct bpf_map map;
/* Lookup elem does not require accessing the map.
*
* Updating/Deleting requires a bucket lock to
* link/unlink the elem from the map. Having
* multiple buckets to improve contention.
*/
struct bpf_local_storage_map_bucket *buckets;
u32 bucket_log;
u16 elem_size;
u16 cache_idx;
};
struct bpf_local_storage_data {
/* smap is used as the searching key when looking up
* from the object's bpf_local_storage.
*
* Put it in the same cacheline as the data to minimize
* the number of cachelines access during the cache hit case.
*/
struct bpf_local_storage_map __rcu *smap;
u8 data[] __aligned(8);
};
/* Linked to bpf_local_storage and bpf_local_storage_map */
struct bpf_local_storage_elem {
struct hlist_node map_node; /* Linked to bpf_local_storage_map */
struct hlist_node snode; /* Linked to bpf_local_storage */
struct bpf_local_storage __rcu *local_storage;
struct rcu_head rcu;
/* 8 bytes hole */
/* The data is stored in aother cacheline to minimize
* the number of cachelines access during a cache hit.
*/
struct bpf_local_storage_data sdata ____cacheline_aligned;
};
#define SELEM(_SDATA) \
container_of((_SDATA), struct bpf_local_storage_elem, sdata)
#define SDATA(_SELEM) (&(_SELEM)->sdata)
struct bpf_local_storage {
struct bpf_local_storage_data __rcu *cache[BPF_LOCAL_STORAGE_CACHE_SIZE];
struct hlist_head list; /* List of bpf_local_storage_elem */
void *owner; /* The object that owns the above "list" of
* bpf_local_storage_elem.
*/
struct rcu_head rcu;
raw_spinlock_t lock; /* Protect adding/removing from the "list" */
};
static struct bpf_local_storage_map_bucket *
select_bucket(struct bpf_local_storage_map *smap,
struct bpf_local_storage_elem *selem)
{
return &smap->buckets[hash_ptr(selem, smap->bucket_log)];
}
static int omem_charge(struct sock *sk, unsigned int size)
{
/* same check as in sock_kmalloc() */
if (size <= sysctl_optmem_max &&
atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
atomic_add(size, &sk->sk_omem_alloc);
return 0;
}
return -ENOMEM;
}
static int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size)
{
struct bpf_map *map = &smap->map;
if (!map->ops->map_local_storage_charge)
return 0;
return map->ops->map_local_storage_charge(smap, owner, size);
}
static void mem_uncharge(struct bpf_local_storage_map *smap, void *owner,
u32 size)
{
struct bpf_map *map = &smap->map;
if (map->ops->map_local_storage_uncharge)
map->ops->map_local_storage_uncharge(smap, owner, size);
}
static struct bpf_local_storage __rcu **
owner_storage(struct bpf_local_storage_map *smap, void *owner)
{
struct bpf_map *map = &smap->map;
return map->ops->map_owner_storage_ptr(owner);
}
static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem)
{
return !hlist_unhashed(&selem->snode);
}
static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
{
return !hlist_unhashed(&selem->map_node);
}
struct bpf_local_storage_elem *
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
void *value, bool charge_mem)
{
struct bpf_local_storage_elem *selem;
if (charge_mem && mem_charge(smap, owner, smap->elem_size))
return NULL;
selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN);
if (selem) {
if (value)
memcpy(SDATA(selem)->data, value, smap->map.value_size);
return selem;
}
if (charge_mem)
mem_uncharge(smap, owner, smap->elem_size);
return NULL;
}
/* local_storage->lock must be held and selem->local_storage == local_storage.
* The caller must ensure selem->smap is still valid to be
* dereferenced for its smap->elem_size and smap->cache_idx.
*/
bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
struct bpf_local_storage_elem *selem,
bool uncharge_mem)
{
struct bpf_local_storage_map *smap;
bool free_local_storage;
void *owner;
smap = rcu_dereference(SDATA(selem)->smap);
owner = local_storage->owner;
/* All uncharging on the owner must be done first.
* The owner may be freed once the last selem is unlinked
* from local_storage.
*/
if (uncharge_mem)
mem_uncharge(smap, owner, smap->elem_size);
free_local_storage = hlist_is_singular_node(&selem->snode,
&local_storage->list);
if (free_local_storage) {
mem_uncharge(smap, owner, sizeof(struct bpf_local_storage));
local_storage->owner = NULL;
/* After this RCU_INIT, owner may be freed and cannot be used */
RCU_INIT_POINTER(*owner_storage(smap, owner), NULL);
/* local_storage is not freed now. local_storage->lock is
* still held and raw_spin_unlock_bh(&local_storage->lock)
* will be done by the caller.
*
* Although the unlock will be done under
* rcu_read_lock(), it is more intutivie to
* read if kfree_rcu(local_storage, rcu) is done
* after the raw_spin_unlock_bh(&local_storage->lock).
*
* Hence, a "bool free_local_storage" is returned
* to the caller which then calls the kfree_rcu()
* after unlock.
*/
}
hlist_del_init_rcu(&selem->snode);
if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) ==
SDATA(selem))
RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
kfree_rcu(selem, rcu);
return free_local_storage;
}
static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
{
struct bpf_local_storage *local_storage;
bool free_local_storage = false;
if (unlikely(!selem_linked_to_storage(selem)))
/* selem has alr
|