// SPDX-License-Identifier: GPL-2.0-only
/*
* Functions to manage eBPF programs attached to cgroups
*
* Copyright (c) 2016 Daniel Mack
*/
#include <linux/kernel.h>
#include <linux/atomic.h>
#include <linux/cgroup.h>
#include <linux/filter.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
#include <linux/string.h>
#include <linux/bpf.h>
#include <linux/bpf-cgroup.h>
#include <net/sock.h>
#include <net/bpf_sk_storage.h>
#include "../cgroup/cgroup-internal.h"
DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
EXPORT_SYMBOL(cgroup_bpf_enabled_key);
void cgroup_bpf_offline(struct cgroup *cgrp)
{
cgroup_get(cgrp);
percpu_ref_kill(&cgrp->bpf.refcnt);
}
/**
* cgroup_bpf_release() - put references of all bpf programs and
* release all cgroup bpf data
* @work: work structure embedded into the cgroup to modify
*/
static void cgroup_bpf_release(struct work_struct *work)
{
struct cgroup *p, *cgrp = container_of(work, struct cgroup,
bpf.release_work);
enum bpf_cgroup_storage_type stype;
struct bpf_prog_array *old_array;
unsigned int type;
mutex_lock(&cgroup_mutex);
for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
struct list_head *progs = &cgrp->bpf.progs[type];
struct bpf_prog_list *pl, *tmp;
list_for_each_entry_safe(pl, tmp, progs, node) {
list_del(&pl->node);
bpf_prog_put(pl->prog);
for_each_cgroup_storage_type(stype) {
bpf_cgroup_storage_unlink(pl->storage[stype]);
bpf_cgroup_storage_free(pl->storage[stype]);
}
kfree(pl);
static_branch_dec(&cgroup_bpf_enabled_key);
}
old_array = rcu_dereference_protected(
cgrp->bpf.effective[type],
lockdep_is_held(&cgroup_mutex));
bpf_prog_array_free(old_array);
}
mutex_unlock(&cgroup_mutex);
for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
cgroup_bpf_put(p);
percpu_ref_exit(&cgrp->bpf.refcnt);
cgroup_put(cgrp);
}
/**
* cgroup_bpf_release_fn() - callback used to schedule releasing
* of bpf cgroup data
* @ref: percpu ref counter structure
*/
static void cgroup_bpf_release_fn(struct percpu_ref *ref)
{
struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
queue_work(system_wq, &cgrp->bpf.release_work);
}
/* count number of elements in the list.
* it's slow but the list cannot be long
*/
static u32 prog_list_length(struct list_head *head)
{
struct bpf_prog_list *pl;
u32 cnt = 0;
list_for_each_entry(pl, head, node) {
if (!pl->prog)
continue;
cnt++;
}
return cnt;
}
/* if parent has non-overridable prog attached,
* disallow attaching new programs to the descendent cgroup.
* if parent has overridable or multi-prog, allow attaching
*/
static bool hierarchy_allows_attach(struct cgroup *cgrp,
enum bpf_attach_type type,
u32 new_flags)
{
struct cgroup *p;
p = cgroup_parent(cgrp);
if (!p)
return true;
do {
u32 flags = p->bpf.flags[type];
u32 cnt;
if (flags & BPF_F_ALLOW_MULTI)
return true;
cnt = prog_list_length(&p->bpf.progs[type]);
WARN_ON_ONCE(cnt > 1);
if (cnt == 1)
return !!(flags & BPF_F_ALLOW_OVERRIDE);
p = cgroup_parent(p);
} while (p);
return true;
}
/* compute a chain of effective programs for a given cgroup:
* start from the list of programs in this cgroup and add
* all parent programs.
* Note that parent's F_ALLOW_OVERRIDE-type program is yielding
* to programs in this cgroup
*/
static int compute_effective_progs(struct cgroup *cgrp,
enum bpf_attach_type type,
struct bpf_prog_array **array)
{
enum bpf_cgroup_storage_type stype;
struct bpf_prog_array *progs;
struct bpf_prog_list *pl;
struct cgroup *p = cgrp;
int cnt = 0;
/* count number of effective programs by walking parents */
do {
if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
cnt += prog_list_length(&p->bpf.progs[type]);
p = cgroup_parent(p);
} while (p);
progs = bpf_prog_array_alloc(cnt, GFP_KERNEL);
if (!progs)
return -ENOMEM;
/* populate the array with effective progs */
cnt = 0;
p = cgrp;
do {
if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
continue;
list_for_each_entry(pl, &p->bpf.progs[type], node) {
if (!pl->prog)
continue;
progs->items[cnt].prog = pl->prog;
for_each_cgroup_storage_type(stype)
progs->items[cnt].cgroup_storage[stype] =
pl->storage[stype];
cnt++;
}
} while ((p = cgroup_parent(p)));
*array = progs;
return 0;
}
static void activate_effective_progs(struct cgroup *cgrp,
enum bpf_attach_type type,
struct bpf_prog_array *old_array)
{
rcu_swap_protected(cgrp->bpf.effective[type], old_array,
lockdep_is_held(&cgroup_mutex));
/* free prog array after grace period, since __cgroup_bpf_run_*()
* might be still walking the array
*/
bpf_prog_array_free(old_array);
}
/**
* cgroup_bpf_inherit() - inherit effective programs from parent
* @cgrp: the cgroup to modify
*/
int cgroup_bpf_inherit(struct cgroup *cgrp)
{
/* has to use marco instead of const int, since compiler thinks
* that array below is variable length
*/
#define NR ARRAY_SIZE(cgrp->bpf.effective)
struct bpf_prog_array *arrays[NR] = {};
struct cgroup *p;
int ret, i;
ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
GFP_KERNEL);
if (ret)
return ret;
for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
cgroup_bpf_get(p);
for (i = 0; i < NR; i++)
INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
for (i = 0; i < NR; i++)
if (compute_effective_progs(cgrp, i, &arrays[i]))
goto cleanup;
for (i = 0; i < NR; i++)
activate_effective_progs(cgrp, i, arrays[i]);
return 0;
cleanup:
for (i = 0; i < NR; i++)
bpf_prog_array_free(arrays[i]);
for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
cgroup_bpf_put(p);
percpu_ref_exit(
|