diff options
Diffstat (limited to 'kernel')
82 files changed, 4175 insertions, 1398 deletions
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index ebdb0043203a..84d882f3e299 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -225,7 +225,7 @@ config ARCH_SUPPORTS_ATOMIC_RMW config MUTEX_SPIN_ON_OWNER def_bool y - depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW + depends on SMP && ARCH_SUPPORTS_ATOMIC_RMW config RWSEM_SPIN_ON_OWNER def_bool y diff --git a/kernel/audit.c b/kernel/audit.c index f1ca11613379..67b9fbd871be 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -126,7 +126,7 @@ static atomic_t audit_lost = ATOMIC_INIT(0); /* The netlink socket. */ static struct sock *audit_sock; -static int audit_net_id; +static unsigned int audit_net_id; /* Hash for inode-based rules */ struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS]; @@ -1172,9 +1172,8 @@ static void __net_exit audit_net_exit(struct net *net) audit_sock = NULL; } - RCU_INIT_POINTER(aunet->nlsk, NULL); - synchronize_net(); netlink_kernel_release(sock); + aunet->nlsk = NULL; } static struct pernet_operations audit_net_ops __net_initdata = { diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index eed911d091da..1276474ac3cd 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -1,7 +1,8 @@ obj-y := core.o obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o -obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o +obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o ifeq ($(CONFIG_PERF_EVENTS),y) obj-$(CONFIG_BPF_SYSCALL) += stackmap.o endif +obj-$(CONFIG_CGROUP_BPF) += cgroup.o diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c new file mode 100644 index 000000000000..89b7ef41c86b --- /dev/null +++ b/kernel/bpf/bpf_lru_list.c @@ -0,0 +1,695 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/cpumask.h> +#include <linux/spinlock.h> +#include <linux/percpu.h> + +#include "bpf_lru_list.h" + +#define LOCAL_FREE_TARGET (128) +#define LOCAL_NR_SCANS LOCAL_FREE_TARGET + +#define PERCPU_FREE_TARGET (16) +#define PERCPU_NR_SCANS PERCPU_FREE_TARGET + +/* Helpers to get the local list index */ +#define LOCAL_LIST_IDX(t) ((t) - BPF_LOCAL_LIST_T_OFFSET) +#define LOCAL_FREE_LIST_IDX LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_FREE) +#define LOCAL_PENDING_LIST_IDX LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_PENDING) +#define IS_LOCAL_LIST_TYPE(t) ((t) >= BPF_LOCAL_LIST_T_OFFSET) + +static int get_next_cpu(int cpu) +{ + cpu = cpumask_next(cpu, cpu_possible_mask); + if (cpu >= nr_cpu_ids) + cpu = cpumask_first(cpu_possible_mask); + return cpu; +} + +/* Local list helpers */ +static struct list_head *local_free_list(struct bpf_lru_locallist *loc_l) +{ + return &loc_l->lists[LOCAL_FREE_LIST_IDX]; +} + +static struct list_head *local_pending_list(struct bpf_lru_locallist *loc_l) +{ + return &loc_l->lists[LOCAL_PENDING_LIST_IDX]; +} + +/* bpf_lru_node helpers */ +static bool bpf_lru_node_is_ref(const struct bpf_lru_node *node) +{ + return node->ref; +} + +static void bpf_lru_list_count_inc(struct bpf_lru_list *l, + enum bpf_lru_list_type type) +{ + if (type < NR_BPF_LRU_LIST_COUNT) + l->counts[type]++; +} + +static void bpf_lru_list_count_dec(struct bpf_lru_list *l, + enum bpf_lru_list_type type) +{ + if (type < NR_BPF_LRU_LIST_COUNT) + l->counts[type]--; +} + +static void __bpf_lru_node_move_to_free(struct bpf_lru_list *l, + struct bpf_lru_node *node, + struct list_head *free_list, + enum bpf_lru_list_type tgt_free_type) +{ + if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type))) + return; + + /* If the removing node is the next_inactive_rotation candidate, + * move the next_inactive_rotation pointer also. + */ + if (&node->list == l->next_inactive_rotation) + l->next_inactive_rotation = l->next_inactive_rotation->prev; + + bpf_lru_list_count_dec(l, node->type); + + node->type = tgt_free_type; + list_move(&node->list, free_list); +} + +/* Move nodes from local list to the LRU list */ +static void __bpf_lru_node_move_in(struct bpf_lru_list *l, + struct bpf_lru_node *node, + enum bpf_lru_list_type tgt_type) +{ + if (WARN_ON_ONCE(!IS_LOCAL_LIST_TYPE(node->type)) || + WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type))) + return; + + bpf_lru_list_count_inc(l, tgt_type); + node->type = tgt_type; + node->ref = 0; + list_move(&node->list, &l->lists[tgt_type]); +} + +/* Move nodes between or within active and inactive list (like + * active to inactive, inactive to active or tail of active back to + * the head of active). + */ +static void __bpf_lru_node_move(struct bpf_lru_list *l, + struct bpf_lru_node *node, + enum bpf_lru_list_type tgt_type) +{ + if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type)) || + WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type))) + return; + + if (node->type != tgt_type) { + bpf_lru_list_count_dec(l, node->type); + bpf_lru_list_count_inc(l, tgt_type); + node->type = tgt_type; + } + node->ref = 0; + + /* If the moving node is the next_inactive_rotation candidate, + * move the next_inactive_rotation pointer also. + */ + if (&node->list == l->next_inactive_rotation) + l->next_inactive_rotation = l->next_inactive_rotation->prev; + + list_move(&node->list, &l->lists[tgt_type]); +} + +static bool bpf_lru_list_inactive_low(const struct bpf_lru_list *l) +{ + return l->counts[BPF_LRU_LIST_T_INACTIVE] < + l->counts[BPF_LRU_LIST_T_ACTIVE]; +} + +/* Rotate the active list: + * 1. Start from tail + * 2. If the node has the ref bit set, it will be rotated + * back to the head of active list with the ref bit cleared. + * Give this node one more chance to survive in the active list. + * 3. If the ref bit is not set, move it to the head of the + * inactive list. + * 4. It will at most scan nr_scans nodes + */ +static void __bpf_lru_list_rotate_active(struct bpf_lru *lru, + struct bpf_lru_list *l) +{ + struct list_head *active = &l->lists[BPF_LRU_LIST_T_ACTIVE]; + struct bpf_lru_node *node, *tmp_node, *first_node; + unsigned int i = 0; + + first_node = list_first_entry(active, struct bpf_lru_node, list); + list_for_each_entry_safe_reverse(node, tmp_node, active, list) { + if (bpf_lru_node_is_ref(node)) + __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE); + else + __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE); + + if (++i == lru->nr_scans || node == first_node) + break; + } +} + +/* Rotate the inactive list. It starts from the next_inactive_rotation + * 1. If the node has ref bit set, it will be moved to the head + * of active list with the ref bit cleared. + * 2. If the node does not have ref bit set, it will leave it + * at its current location (i.e. do nothing) so that it can + * be considered during the next inactive_shrink. + * 3. It will at most scan nr_scans nodes + */ +static void __bpf_lru_list_rotate_inactive(struct bpf_lru *lru, + struct bpf_lru_list *l) +{ + struct list_head *inactive = &l->lists[BPF_LRU_LIST_T_INACTIVE]; + struct list_head *cur, *last, *next = inactive; + struct bpf_lru_node *node; + unsigned int i = 0; + + if (list_empty(inactive)) + return; + + last = l->next_inactive_rotation->next; + if (last == inactive) + last = last->next; + + cur = l->next_inactive_rotation; + while (i < lru->nr_scans) { + if (cur == inactive) { + cur = cur->prev; + continue; + } + + node = list_entry(cur, struct bpf_lru_node, list); + next = cur->prev; + if (bpf_lru_node_is_ref(node)) + __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE); + if (cur == last) + break; + cur = next; + i++; + } + + l->next_inactive_rotation = next; +} + +/* Shrink the inactive list. It starts from the tail of the + * inactive list and only move the nodes without the ref bit + * set to the designated free list. + */ +static unsigned int +__bpf_lru_list_shrink_inactive(struct bpf_lru *lru, + struct bpf_lru_list *l, + unsigned int tgt_nshrink, + struct list_head *free_list, + enum bpf_lru_list_type tgt_free_type) +{ + struct list_head *inactive = &l->lists[BPF_LRU_LIST_T_INACTIVE]; + struct bpf_lru_node *node, *tmp_node, *first_node; + unsigned int nshrinked = 0; + unsigned int i = 0; + + first_node = list_first_entry(inactive, struct bpf_lru_node, list); + list_for_each_entry_safe_reverse(node, tmp_node, inactive, list) { + if (bpf_lru_node_is_ref(node)) { + __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE); + } else if (lru->del_from_htab(lru->del_arg, node)) { + __bpf_lru_node_move_to_free(l, node, free_list, + tgt_free_type); + if (++nshrinked == tgt_nshrink) + break; + } + + if (++i == lru->nr_scans) + break; + } + + return nshrinked; +} + +/* 1. Rotate the active list (if needed) + * 2. Always rotate the inactive list + */ +static void __bpf_lru_list_rotate(struct bpf_lru *lru, struct bpf_lru_list *l) +{ + if (bpf_lru_list_inactive_low(l)) + __bpf_lru_list_rotate_active(lru, l); + + __bpf_lru_list_rotate_inactive(lru, l); +} + +/* Calls __bpf_lru_list_shrink_inactive() to shrink some + * ref-bit-cleared nodes and move them to the designated + * free list. + * + * If it cannot get a free node after calling + * __bpf_lru_list_shrink_inactive(). It will just remove + * one node from either inactive or active list without + * honoring the ref-bit. It prefers inactive list to active + * list in this situation. + */ +static unsigned int __bpf_lru_list_shrink(struct bpf_lru *lru, + struct bpf_lru_list *l, + unsigned int tgt_nshrink, + struct list_head *free_list, + enum bpf_lru_list_type tgt_free_type) + +{ + struct bpf_lru_node *node, *tmp_node; + struct list_head *force_shrink_list; + unsigned int nshrinked; + + nshrinked = __bpf_lru_list_shrink_inactive(lru, l, tgt_nshrink, + free_list, tgt_free_type); + if (nshrinked) + return nshrinked; + + /* Do a force shrink by ignoring the reference bit */ + if (!list_empty(&l->lists[BPF_LRU_LIST_T_INACTIVE])) + force_shrink_list = &l->lists[BPF_LRU_LIST_T_INACTIVE]; + else + force_shrink_list = &l->lists[BPF_LRU_LIST_T_ACTIVE]; + + list_for_each_entry_safe_reverse(node, tmp_node, force_shrink_list, + list) { + if (lru->del_from_htab(lru->del_arg, node)) { + __bpf_lru_node_move_to_free(l, node, free_list, + tgt_free_type); + return 1; + } + } + + return 0; +} + +/* Flush the nodes from the local pending list to the LRU list */ +static void __local_list_flush(struct bpf_lru_list *l, + struct bpf_lru_locallist *loc_l) +{ + struct bpf_lru_node *node, *tmp_node; + + list_for_each_entry_safe_reverse(node, tmp_node, + local_pending_list(loc_l), list) { + if (bpf_lru_node_is_ref(node)) + __bpf_lru_node_move_in(l, node, BPF_LRU_LIST_T_ACTIVE); + else + __bpf_lru_node_move_in(l, node, + BPF_LRU_LIST_T_INACTIVE); + } +} + +static void bpf_lru_list_push_free(struct bpf_lru_list *l, + struct bpf_lru_node *node) +{ + unsigned long flags; + + if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type))) + return; + + raw_spin_lock_irqsave(&l->lock, flags); + __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE); + raw_spin_unlock_irqrestore(&l->lock, flags); +} + +static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru, + struct bpf_lru_locallist *loc_l) +{ + struct bpf_lru_list *l = &lru->common_lru.lru_list; + struct bpf_lru_node *node, *tmp_node; + unsigned int nfree = 0; + + raw_spin_lock(&l->lock); + + __local_list_flush(l, loc_l); + + __bpf_lru_list_rotate(lru, l); + + list_for_each_entry_safe(node, tmp_node, &l->lists[BPF_LRU_LIST_T_FREE], + list) { + __bpf_lru_node_move_to_free(l, node, local_free_list(loc_l), + BPF_LRU_LOCAL_LIST_T_FREE); + if (++nfree == LOCAL_FREE_TARGET) + break; + } + + if (nfree < LOCAL_FREE_TARGET) + __bpf_lru_list_shrink(lru, l, LOCAL_FREE_TARGET - nfree, + local_free_list(loc_l), + BPF_LRU_LOCAL_LIST_T_FREE); + |
