summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEdward Cree <ecree@solarflare.com>2017-08-07 15:26:19 +0100
committerDavid S. Miller <davem@davemloft.net>2017-08-08 17:51:34 -0700
commitf1174f77b50c94eecaa658fdc56fa69b421de4b8 (patch)
treed6e11577190ab47f4c371da21cf1f0f14da66db0
parente1cb90f2b83b6b48deeba0ac9f1920693cbad7e1 (diff)
downloadlinux-f1174f77b50c94eecaa658fdc56fa69b421de4b8.tar.gz
linux-f1174f77b50c94eecaa658fdc56fa69b421de4b8.tar.bz2
linux-f1174f77b50c94eecaa658fdc56fa69b421de4b8.zip
bpf/verifier: rework value tracking
Unifies adjusted and unadjusted register value types (e.g. FRAME_POINTER is now just a PTR_TO_STACK with zero offset). Tracks value alignment by means of tracking known & unknown bits. This also replaces the 'reg->imm' (leading zero bits) calculations for (what were) UNKNOWN_VALUEs. If pointer leaks are allowed, and adjust_ptr_min_max_vals returns -EACCES, treat the pointer as an unknown scalar and try again, because we might be able to conclude something about the result (e.g. pointer & 0x40 is either 0 or 0x40). Verifier hooks in the netronome/nfp driver were changed to match the new data structures. Signed-off-by: Edward Cree <ecree@solarflare.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/verifier.c24
-rw-r--r--include/linux/bpf.h34
-rw-r--r--include/linux/bpf_verifier.h34
-rw-r--r--include/linux/tnum.h79
-rw-r--r--kernel/bpf/Makefile2
-rw-r--r--kernel/bpf/tnum.c164
-rw-r--r--kernel/bpf/verifier.c1780
7 files changed, 1265 insertions, 852 deletions
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index d696ba46f70a..5b783a91b115 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -79,28 +79,32 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
const struct bpf_verifier_env *env)
{
const struct bpf_reg_state *reg0 = &env->cur_state.regs[0];
+ u64 imm;
if (nfp_prog->act == NN_ACT_XDP)
return 0;
- if (reg0->type != CONST_IMM) {
- pr_info("unsupported exit state: %d, imm: %llx\n",
- reg0->type, reg0->imm);
+ if (!(reg0->type == SCALAR_VALUE && tnum_is_const(reg0->var_off))) {
+ char tn_buf[48];
+
+ tnum_strn(tn_buf, sizeof(tn_buf), reg0->var_off);
+ pr_info("unsupported exit state: %d, var_off: %s\n",
+ reg0->type, tn_buf);
return -EINVAL;
}
- if (nfp_prog->act != NN_ACT_DIRECT &&
- reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) {
+ imm = reg0->var_off.value;
+ if (nfp_prog->act != NN_ACT_DIRECT && imm != 0 && (imm & ~0U) != ~0U) {
pr_info("unsupported exit state: %d, imm: %llx\n",
- reg0->type, reg0->imm);
+ reg0->type, imm);
return -EINVAL;
}
- if (nfp_prog->act == NN_ACT_DIRECT && reg0->imm <= TC_ACT_REDIRECT &&
- reg0->imm != TC_ACT_SHOT && reg0->imm != TC_ACT_STOLEN &&
- reg0->imm != TC_ACT_QUEUED) {
+ if (nfp_prog->act == NN_ACT_DIRECT && imm <= TC_ACT_REDIRECT &&
+ imm != TC_ACT_SHOT && imm != TC_ACT_STOLEN &&
+ imm != TC_ACT_QUEUED) {
pr_info("unsupported exit state: %d, imm: %llx\n",
- reg0->type, reg0->imm);
+ reg0->type, imm);
return -EINVAL;
}
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6353c7474dba..39229c455cba 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -117,35 +117,25 @@ enum bpf_access_type {
};
/* types of values stored in eBPF registers */
+/* Pointer types represent:
+ * pointer
+ * pointer + imm
+ * pointer + (u16) var
+ * pointer + (u16) var + imm
+ * if (range > 0) then [ptr, ptr + range - off) is safe to access
+ * if (id > 0) means that some 'var' was added
+ * if (off > 0) means that 'imm' was added
+ */
enum bpf_reg_type {
NOT_INIT = 0, /* nothing was written into register */
- UNKNOWN_VALUE, /* reg doesn't contain a valid pointer */
+ SCALAR_VALUE, /* reg doesn't contain a valid pointer */
PTR_TO_CTX, /* reg points to bpf_context */
CONST_PTR_TO_MAP, /* reg points to struct bpf_map */
PTR_TO_MAP_VALUE, /* reg points to map element value */
PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
- FRAME_PTR, /* reg == frame_pointer */
- PTR_TO_STACK, /* reg == frame_pointer + imm */
- CONST_IMM, /* constant integer value */
-
- /* PTR_TO_PACKET represents:
- * skb->data
- * skb->data + imm
- * skb->data + (u16) var
- * skb->data + (u16) var + imm
- * if (range > 0) then [ptr, ptr + range - off) is safe to access
- * if (id > 0) means that some 'var' was added
- * if (off > 0) menas that 'imm' was added
- */
- PTR_TO_PACKET,
+ PTR_TO_STACK, /* reg == frame_pointer + offset */
+ PTR_TO_PACKET, /* reg points to skb->data */
PTR_TO_PACKET_END, /* skb->data + headlen */
-
- /* PTR_TO_MAP_VALUE_ADJ is used for doing pointer math inside of a map
- * elem value. We only allow this if we can statically verify that
- * access from this register are going to fall within the size of the
- * map element.
- */
- PTR_TO_MAP_VALUE_ADJ,
};
struct bpf_prog;
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 8e5d31f6faef..85936fa92d12 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -9,6 +9,7 @@
#include <linux/bpf.h> /* for enum bpf_reg_type */
#include <linux/filter.h> /* for MAX_BPF_STACK */
+#include <linux/tnum.h>
/* Just some arbitrary values so we can safely do math without overflowing and
* are obviously wrong for any sort of memory access.
@@ -19,30 +20,37 @@
struct bpf_reg_state {
enum bpf_reg_type type;
union {
- /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
- s64 imm;
-
- /* valid when type == PTR_TO_PACKET* */
- struct {
- u16 off;
- u16 range;
- };
+ /* valid when type == PTR_TO_PACKET */
+ u16 range;
/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
* PTR_TO_MAP_VALUE_OR_NULL
*/
struct bpf_map *map_ptr;
};
+ /* Fixed part of pointer offset, pointer types only */
+ s32 off;
+ /* For PTR_TO_PACKET, used to find other pointers with the same variable
+ * offset, so they can share range knowledge.
+ * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we
+ * came from, when one is tested for != NULL.
+ */
u32 id;
+ /* These three fields must be last. See states_equal() */
+ /* For scalar types (SCALAR_VALUE), this represents our knowledge of
+ * the actual value.
+ * For pointer types, this represents the variable part of the offset
+ * from the pointed-to object, and is shared with all bpf_reg_states
+ * with the same id as us.
+ */
+ struct tnum var_off;
/* Used to determine if any memory access using this register will
- * result in a bad access. These two fields must be last.
- * See states_equal()
+ * result in a bad access.
+ * These refer to the same value as var_off, not necessarily the actual
+ * contents of the register.
*/
s64 min_value;
u64 max_value;
- u32 min_align;
- u32 aux_off;
- u32 aux_off_align;
bool value_from_signed;
};
diff --git a/include/linux/tnum.h b/include/linux/tnum.h
new file mode 100644
index 000000000000..a0b07bf1842b
--- /dev/null
+++ b/include/linux/tnum.h
@@ -0,0 +1,79 @@
+/* tnum: tracked (or tristate) numbers
+ *
+ * A tnum tracks knowledge about the bits of a value. Each bit can be either
+ * known (0 or 1), or unknown (x). Arithmetic operations on tnums will
+ * propagate the unknown bits such that the tnum result represents all the
+ * possible results for possible values of the operands.
+ */
+#include <linux/types.h>
+
+struct tnum {
+ u64 value;
+ u64 mask;
+};
+
+/* Constructors */
+/* Represent a known constant as a tnum. */
+struct tnum tnum_const(u64 value);
+/* A completely unknown value */
+extern const struct tnum tnum_unknown;
+
+/* Arithmetic and logical ops */
+/* Shift a tnum left (by a fixed shift) */
+struct tnum tnum_lshift(struct tnum a, u8 shift);
+/* Shift a tnum right (by a fixed shift) */
+struct tnum tnum_rshift(struct tnum a, u8 shift);
+/* Add two tnums, return @a + @b */
+struct tnum tnum_add(struct tnum a, struct tnum b);
+/* Subtract two tnums, return @a - @b */
+struct tnum tnum_sub(struct tnum a, struct tnum b);
+/* Bitwise-AND, return @a & @b */
+struct tnum tnum_and(struct tnum a, struct tnum b);
+/* Bitwise-OR, return @a | @b */
+struct tnum tnum_or(struct tnum a, struct tnum b);
+/* Bitwise-XOR, return @a ^ @b */
+struct tnum tnum_xor(struct tnum a, struct tnum b);
+/* Multiply two tnums, return @a * @b */
+struct tnum tnum_mul(struct tnum a, struct tnum b);
+
+/* Return a tnum representing numbers satisfying both @a and @b */
+struct tnum tnum_intersect(struct tnum a, struct tnum b);
+
+/* Return @a with all but the lowest @size bytes cleared */
+struct tnum tnum_cast(struct tnum a, u8 size);
+
+/* Returns true if @a is a known constant */
+static inline bool tnum_is_const(struct tnum a)
+{
+ return !a.mask;
+}
+
+/* Returns true if @a == tnum_const(@b) */
+static inline bool tnum_equals_const(struct tnum a, u64 b)
+{
+ return tnum_is_const(a) && a.value == b;
+}
+
+/* Returns true if @a is completely unknown */
+static inline bool tnum_is_unknown(struct tnum a)
+{
+ return !~a.mask;
+}
+
+/* Returns true if @a is known to be a multiple of @size.
+ * @size must be a power of two.
+ */
+bool tnum_is_aligned(struct tnum a, u64 size);
+
+/* Returns true if @b represents a subset of @a. */
+bool tnum_in(struct tnum a, struct tnum b);
+
+/* Formatting functions. These have snprintf-like semantics: they will write
+ * up to @size bytes (including the terminating NUL byte), and return the number
+ * of bytes (excluding the terminating NUL) which would have been written had
+ * sufficient space been available. (Thus tnum_sbin always returns 64.)
+ */
+/* Format a tnum as a pair of hex numbers (value; mask) */
+int tnum_strn(char *str, size_t size, struct tnum a);
+/* Format a tnum as tristate binary expansion */
+int tnum_sbin(char *str, size_t size, struct tnum a);
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 48e92705be59..2f0bcda40e90 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1,6 +1,6 @@
obj-y := core.o
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
new file mode 100644
index 000000000000..92eeeb1974a2
--- /dev/null
+++ b/kernel/bpf/tnum.c
@@ -0,0 +1,164 @@
+/* tnum: tracked (or tristate) numbers
+ *
+ * A tnum tracks knowledge about the bits of a value. Each bit can be either
+ * known (0 or 1), or unknown (x). Arithmetic operations on tnums will
+ * propagate the unknown bits such that the tnum result represents all the
+ * possible results for possible values of the operands.
+ */
+#include <linux/kernel.h>
+#include <linux/tnum.h>
+
+#define TNUM(_v, _m) (struct tnum){.value = _v, .mask = _m}
+/* A completely unknown value */
+const struct tnum tnum_unknown = { .value = 0, .mask = -1 };
+
+struct tnum tnum_const(u64 value)
+{
+ return TNUM(value, 0);
+}
+
+struct tnum tnum_lshift(struct tnum a, u8 shift)
+{
+ return TNUM(a.value << shift, a.mask << shift);
+}
+
+struct tnum tnum_rshift(struct tnum a, u8 shift)
+{
+ return TNUM(a.value >> shift, a.mask >> shift);
+}
+
+struct tnum tnum_add(struct tnum a, struct tnum b)
+{
+ u64 sm, sv, sigma, chi, mu;
+
+ sm = a.mask + b.mask;
+ sv = a.value + b.value;
+ sigma = sm + sv;
+ chi = sigma ^ sv;
+ mu = chi | a.mask | b.mask;
+ return TNUM(sv & ~mu, mu);
+}
+
+struct tnum tnum_sub(struct tnum a, struct tnum b)
+{
+ u64 dv, alpha, beta, chi, mu;
+
+ dv = a.value - b.value;
+ alpha = dv + a.mask;
+ beta = dv - b.mask;
+ chi = alpha ^ beta;
+ mu = chi | a.mask | b.mask;
+ return TNUM(dv & ~mu, mu);
+}
+
+struct tnum tnum_and(struct tnum a, struct tnum b)
+{
+ u64 alpha, beta, v;
+
+ alpha = a.value | a.mask;
+ beta = b.value | b.mask;
+ v = a.value & b.value;
+ return TNUM(v, alpha & beta & ~v);
+}
+
+struct tnum tnum_or(struct tnum a, struct tnum b)
+{
+ u64 v, mu;
+
+ v = a.value | b.value;
+ mu = a.mask | b.mask;
+ return TNUM(v, mu & ~v);
+}
+
+struct tnum tnum_xor(struct tnum a, struct tnum b)
+{
+ u64 v, mu;
+
+ v = a.value ^ b.value;
+ mu = a.mask | b.mask;
+ return TNUM(v & ~mu, mu);
+}
+
+/* half-multiply add: acc += (unknown * mask * value).
+ * An intermediate step in the multiply algorithm.
+ */
+static struct tnum hma(struct tnum acc, u64 value, u64 mask)
+{
+ while (mask) {
+ if (mask & 1)
+ acc = tnum_add(acc, TNUM(0, value));
+ mask >>= 1;
+ value <<= 1;
+ }
+ return acc;
+}
+
+struct tnum tnum_mul(struct tnum a, struct tnum b)
+{
+ struct tnum acc;
+ u64 pi;
+
+ pi = a.value * b.value;
+ acc = hma(TNUM(pi, 0), a.mask, b.mask | b.value);
+ return hma(acc, b.mask, a.value);
+}
+
+/* Note that if a and b disagree - i.e. one has a 'known 1' where the other has
+ * a 'known 0' - this will return a 'known 1' for that bit.
+ */
+struct tnum tnum_intersect(struct tnum a, struct tnum b)
+{
+ u64 v, mu;
+
+ v = a.value | b.value;
+ mu = a.mask & b.mask;
+ return TNUM(v & ~mu, mu);
+}
+
+struct tnum tnum_cast(struct tnum a, u8 size)
+{
+ a.value &= (1ULL << (size * 8)) - 1;
+ a.mask &= (1ULL << (size * 8)) - 1;
+ return a;
+}
+
+bool tnum_is_aligned(struct tnum a, u64 size)
+{
+ if (!size)
+ return true;
+ return !((a.value | a.mask) & (size - 1));
+}
+
+bool tnum_in(struct tnum a, struct tnum b)
+{
+ if (b.mask & ~a.mask)
+ return false;
+ b.value &= ~a.mask;
+ return a.value == b.value;
+}
+
+int tnum_strn(char *str, size_t size, struct tnum a)
+{
+ return snprintf(str, size, "(%#llx; %#llx)", a.value, a.mask);
+}
+EXPORT_SYMBOL_GPL(tnum_strn);
+
+int tnum_sbin(char *str, size_t size, struct tnum a)
+{
+ size_t n;
+
+ for (n = 64; n; n--) {
+ if (n < size) {
+ if (a.mask & 1)
+ str[n - 1] = 'x';
+ else if (a.value & 1)
+ str[n - 1] = '1';
+ else
+ str[n - 1] = '0';
+ }
+ a.mask >>= 1;
+ a.value >>= 1;
+ }
+ str[min(size - 1, (size_t)64)] = 0;
+ return 64;
+}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f6e8b3887eab..c3f88b466c30 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -61,12 +61,12 @@
* (and -20 constant is saved for further stack bounds checking).
* Meaning that this reg is a pointer to stack plus known immediate constant.
*
- * Most of the time the registers have UNKNOWN_VALUE type, which
+ * Most of the time the registers have SCALAR_VALUE type, which
* means the register has some value, but it's not a valid pointer.
- * (like pointer plus pointer becomes UNKNOWN_VALUE type)
+ * (like pointer plus pointer becomes SCALAR_VALUE type)
*
* When verifier sees load or store instructions the type of base register
- * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, FRAME_PTR. These are three pointer
+ * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer
* types recognized by check_mem_access() function.
*
* PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
@@ -180,15 +180,12 @@ static __printf(1, 2) void verbose(const char *fmt, ...)
/* string representation of 'enum bpf_reg_type' */
static const char * const reg_type_str[] = {
[NOT_INIT] = "?",
- [UNKNOWN_VALUE] = "inv",
+ [SCALAR_VALUE] = "inv",
[PTR_TO_CTX] = "ctx",
[CONST_PTR_TO_MAP] = "map_ptr",
[PTR_TO_MAP_VALUE] = "map_value",
[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
- [PTR_TO_MAP_VALUE_ADJ] = "map_value_adj",
- [FRAME_PTR] = "fp",
[PTR_TO_STACK] = "fp",
- [CONST_IMM] = "imm",
[PTR_TO_PACKET] = "pkt",
[PTR_TO_PACKET_END] = "pkt_end",
};
@@ -221,32 +218,36 @@ static void print_verifier_state(struct bpf_verifier_state *state)
if (t == NOT_INIT)
continue;
verbose(" R%d=%s", i, reg_type_str[t]);
- if (t == CONST_IMM || t == PTR_TO_STACK)
- verbose("%lld", reg->imm);
- else if (t == PTR_TO_PACKET)
- verbose("(id=%d,off=%d,r=%d)",
- reg->id, reg->off, reg->range);
- else if (t == UNKNOWN_VALUE && reg->imm)
- verbose("%lld", reg->imm);
- else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE ||
- t == PTR_TO_MAP_VALUE_OR_NULL ||
- t == PTR_TO_MAP_VALUE_ADJ)
- verbose("(ks=%d,vs=%d,id=%u)",
- reg->map_ptr->key_size,
- reg->map_ptr->value_size,
- reg->id);
- if (reg->min_value != BPF_REGISTER_MIN_RANGE)
- verbose(",min_value=%lld",
- (long long)reg->min_value);
- if (reg->max_value != BPF_REGISTER_MAX_RANGE)
- verbose(",max_value=%llu",
- (unsigned long long)reg->max_value);
- if (reg->min_align)
- verbose(",min_align=%u", reg->min_align);
- if (reg->aux_off)
- verbose(",aux_off=%u", reg->aux_off);
- if (reg->aux_off_align)
- verbose(",aux_off_align=%u", reg->aux_off_align);
+ if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
+ tnum_is_const(reg->var_off)) {
+ /* reg->off should be 0 for SCALAR_VALUE */
+ verbose("%lld", reg->var_off.value + reg->off);
+ } else {
+ verbose("(id=%d", reg->id);
+ if (t != SCALAR_VALUE)
+ verbose(",off=%d", reg->off);
+ if (t == PTR_TO_PACKET)
+ verbose(",r=%d", reg->range);
+ else if (t == CONST_PTR_TO_MAP ||
+ t == PTR_TO_MAP_VALUE ||
+ t == PTR_TO_MAP_VALUE_OR_NULL)
+ verbose(",ks=%d,vs=%d",
+ reg->map_ptr->key_size,
+ reg->map_ptr->value_size);
+ if (reg->min_value != BPF_REGISTER_MIN_RANGE)
+ verbose(",min_value=%lld",
+ (long long)reg->min_value);
+ if (reg->max_value != BPF_REGISTER_MAX_RANGE)
+ verbose(",max_value=%llu",
+ (unsigned long long)reg->max_value);
+ if (!tnum_is_unknown(reg->var_off)) {
+ char tn_buf[48];
+
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose(",var_off=%s", tn_buf);
+ }
+ verbose(")");
+ }
}
for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
if (state->stack_slot_type[i] == STACK_SPILL)
@@ -463,14 +464,69 @@ static const int caller_saved[CALLER_SAVED_REGS] = {
BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
};
-static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno)
+static void __mark_reg_not_init(struct bpf_reg_state *reg);
+
+/* Mark the 'variable offset' part of a register as zero. This should be
+ * used only on registers holding a pointer type.
+ */
+static void __mark_reg_known_zero(struct bpf_reg_state *reg)
{
- BUG_ON(regno >= MAX_BPF_REG);
+ reg->var_off = tnum_const(0);
+ reg->min_value = 0;
+ reg->max_value = 0;
+}
- memset(&regs[regno], 0, sizeof(regs[regno]));
- regs[regno].type = NOT_INIT;
- regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
- regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
+static void mark_reg_known_zero(struct bpf_reg_state *regs, u32 regno)
+{
+ if (WARN_ON(regno >= MAX_BPF_REG)) {
+ verbose("mark_reg_known_zero(regs, %u)\n", regno);
+ /* Something bad happened, let's kill all regs */
+ for (regno = 0; regno < MAX_BPF_REG; regno++)
+ __mark_reg_not_init(regs + regno);
+ return;
+ }
+ __mark_reg_known_zero(regs + regno);
+}
+
+/* Mark a register as having a completely unknown (scalar) value. */
+static void __mark_reg_unknown(struct bpf_reg_state *reg)
+{
+ reg->type = SCALAR_VALUE;
+ reg->id = 0;
+ reg->off = 0;
+ reg->var_off = tnum_unknown;
+ reg->min_value = BPF_REGISTER_MIN_RANGE;
+ reg->max_value = BPF_REGISTER_MAX_RANGE;
+}
+
+static void mark_reg_unknown(struct bpf_reg_state *regs, u32 regno)
+{
+ if (WARN_ON(regno >= MAX_BPF_REG)) {
+ verbose("mark_reg_unknown(regs, %u)\n", regno);
+ /* Something bad happened, let's kill all regs */
+ for (regno = 0; regno < MAX_BPF_REG; regno++)
+ __mark_reg_not_init(regs + regno);
+ return;
+ }
+ __mark_reg_unknown(regs + regno);
+}
+
+static void __mark_reg_not_init(struct bpf_reg_state *reg)
+{
+ __mark_reg_unknown(reg);
+ reg->type = NOT_INIT;
+}
+
+static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno)
+{
+ if (WARN_ON(regno >= MAX_BPF_REG)) {
+ verbose("mark_reg_not_init(regs, %u)\n", regno);
+ /* Something bad happened, let's kill all regs */
+ for (regno = 0; regno < MAX_BPF_REG; regno++)
+ __mark_reg_not_init(regs + regno);
+ return;
+ }
+ __mark_reg_not_init(regs + regno);
}
static void init_reg_state(struct bpf_reg_state *regs)
@@ -481,23 +537,12 @@ static void init_reg_state(struct bpf_reg_state *regs)
mark_reg_not_init(regs, i);
/* frame pointer */
- regs[BPF_REG_FP].type = FRAME_PTR;
+ regs[BPF_REG_FP].type = PTR_TO_STACK;
+ mark_reg_known_zero(regs, BPF_REG_FP);
/* 1st arg to a function */
regs[BPF_REG_1].type = PTR_TO_CTX;
-}
-
-static void __mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno)
-{
- regs[regno].type = UNKNOWN_VALUE;
- regs[regno].id = 0;
- regs[regno].imm = 0;
-}
-
-static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno)
-{
- BUG_ON(regno >= MAX_BPF_REG);
- __mark_reg_unknown_value(regs, regno);
+ mark_reg_known_zero(regs, BPF_REG_1);
}
static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno)
@@ -505,14 +550,6 @@ static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno)
regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
regs[regno].value_from_signed = false;
- regs[regno].min_align = 0;
-}
-
-static void mark_reg_unknown_value_and_range(struct bpf_reg_state *regs,
- u32 regno)
-{
- mark_reg_unknown_value(regs, regno);
- reset_reg_range_values(regs, regno);
}
enum reg_arg_type {
@@ -542,7 +579,7 @@ static int check_reg_arg(struct bpf_reg_state *regs, u32 regno,
return -EACCES;
}
if (t == DST_OP)
- mark_reg_unknown_value(regs, regno);
+ mark_reg_unknown(regs, regno);
}
return 0;
}
@@ -552,12 +589,10 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
switch (type) {
case PTR_TO_MAP_VALUE:
case PTR_TO_MAP_VALUE_OR_NULL:
- case PTR_TO_MAP_VALUE_ADJ:
case PTR_TO_STACK:
case PTR_TO_CTX:
case PTR_TO_PACKET:
case PTR_TO_PACKET_END:
- case FRAME_PTR:
case CONST_PTR_TO_MAP:
return true;
default:
@@ -637,14 +672,13 @@ static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
}
if (value_regno >= 0)
/* have read misc data from the stack */
- mark_reg_unknown_value_and_range(state->regs,
- value_regno);
+ mark_reg_unknown(state->regs, value_regno);
return 0;
}
}
/* check read/write into map element returned by bpf_map_lookup_elem() */
-static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
+static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
int size)
{
struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
@@ -657,22 +691,25 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
return 0;
}
-/* check read/write into an adjusted map element */
-static int check_map_access_adj(struct bpf_verifier_env *env, u32 regno,
+/* check read/write into a map element with possible variable offset */
+static int check_map_access(struct bpf_verifier_env *env, u32 regno,
int off, int size)
{
struct bpf_verifier_state *state = &env->cur_state;
struct bpf_reg_state *reg = &state->regs[regno];
int err;
- /* We adjusted the register to this map value, so we
- * need to change off and size to min_value and max_value
- * respectively to make sure our theoretical access will be
- * safe.
+ /* We may have adjusted the register to this map value, so we
+ * need to try adding each of min_value and max_value to off
+ * to make sure our theoretical access will be safe.
*/
if (log_level)
print_verifier_state(state);
- env->varlen_map_value_access = true;
+ /* If the offset is variable, we will need to be stricter in state
+ * pruning from now on.
+ */
+ if (!tnum_is_const(reg->var_off))
+ env->varlen_map_value_access = true;
/* The minimum value is only important with signed
* comparisons where we can't assume the floor of a
* value is 0. If we are using signed variables for our
@@ -684,10 +721,9 @@ static int check_map_access_adj(struct bpf_verifier_env *env, u32 regno,
regno);
return -EACCES;
}
- err = check_map_access(env, regno, reg->min_value + off, size);
+ err = __check_map_access(env, regno, reg->min_value + off, size);
if (err) {
- verbose("R%d min value is outside of the array range\n",
- regno);
+ verbose("R%d min value is outside of the array range\n", regno);
return err;
}
@@ -699,7 +735,10 @@ static int check_map_access_adj(struct bpf_verifier_env *env, u32 regno,
regno);
return -EACCES;
}
- return check_map_access(env, regno, reg->max_value + off, size);
+ err = __check_map_access(env, regno, reg->max_value + off, size);
+ if (err)
+ verbose("R%d max value is outside of the array range\n", regno);
+ return err;
}
#define MAX_PACKET_OFF 0xffff
@@ -729,14 +768,13 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
}
}
-static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
- int size)
+static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
+ int off, int size)
{
struct bpf_reg_state *regs = env->cur_state.regs;
struct bpf_reg_state *reg = &regs[regno];
- off += reg->off;
- if (off < 0 || size <= 0 || off + size > reg->range) {
+ if (off < 0 || size <= 0 || (u64)off + size > reg->range) {
verbose("invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
off, size, regno, reg->id, reg->off, reg->range);
return -EACCES;
@@ -744,7 +782,35 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
return 0;
}
-/* check access to 'struct bpf_context' fields */
+static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
+ int size)
+{
+ struct bpf_reg_state *regs = env->cur_state.regs;
+ struct bpf_reg_state *reg = &regs[regno];
+ int err;
+
+ /* We may have added a variable offset to the packet pointer; but any
+ * reg->range we have comes after that. We are only checking the fixed
+ * offset.
+ */
+
+ /* We don't allow negative numbers, because we aren't tracking enough
+ * detail to prove they're safe.
+ */
+ if (reg->min_value < 0) {
+ verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
+ regno);
+ return -EACCES;
+ }
+ err = __check_packet_access(env, regno, off, size);
+ if (err) {
+ verbose("R%d offset is outside of the packet\n", regno);
+ return err;
+ }
+ return err;
+}
+
+/* check access to 'struct bpf_context' fields. Supports fixed offsets only */
static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
enum bpf_access_type t, enum bpf_reg_type *reg_type)
{
@@ -784,13 +850,7 @@ static bool __is_pointer_value(bool allow_ptr_leaks,
if (allow_ptr_leaks)
return false;
- switch (reg->type) {
- case UNKNOWN_VALUE:
- case CONST_IMM:
- return false;
- default:
- return true;
- }
+ return reg->type != SCALAR_VALUE;
}
static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
@@ -801,23 +861,13 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
int off, int size, bool strict)
{
+ struct tnum reg_off;
int ip_align;
- int reg_off;
/* Byte size accesses are always allowed. */
if (!strict || size == 1)
return 0;
- reg_off = reg->off;
- if (reg->id) {
- if (reg->aux_off_align % size) {
- verbose("Packet access is only %u byte aligned, %d byte access not allowed\n",
- reg->aux_off_align, size);
- return -EACCES;
- }
- reg_off += reg->aux_off;
- }
-
/* For platforms that do not have a Kconfig enabling
* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
* NET_IP_ALIGN is universally set to '2'. And on platforms
@@ -827,20 +877,37 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
* unconditional IP align value of '2'.
*/
ip_align = 2;
- if ((ip_align + reg_off + off) % size != 0) {
- verbose("misaligned packet access off %d+%d+%d size %d\n",
- ip_align, reg_off, off, size);
+
+ reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
+ if (!tnum_is_aligned(reg_off, size)) {
+ char tn_buf[48];
+
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose("misaligned packet access off %d+%s+%d+%d size %d\n",
+ ip_align, tn_buf, reg->off, off, size);
return -EACCES;
}
return 0;
}
-static int check_val_ptr_alignment(const struct bpf_reg_state *reg,
- int size, bool strict)
+static int check_generic_ptr_alignment(const struct bpf_reg_state *reg,
+ const char *pointer_desc,
+ int off, int size, bool strict)
{
- if (strict && size != 1) {
- verbose("Unknown alignment. Only byte-sized access allowed in value access.\n");
+ struct tnum reg_off;
+
+ /* Byte size accesses are always allowed. */
+ if (!strict || size == 1)
+ return 0;
+
+ reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
+ if (!tnum_is_aligned(reg_off, size)) {
+ char tn_buf[48];
+
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose("misaligned %saccess off %s+%d+%d size %d\n",
+ pointer_desc, tn_buf, reg->off, off, size);
return -EACCES;
}
@@ -852,21 +919,25 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
int off, int size)
{
bool strict = env->strict_alignment;
+ const char *pointer_desc = "";
switch (reg->type) {
case PTR_TO_PACKET:
+ /* special case, because of NET_IP_ALIGN */
return check_pkt_ptr_alignment(reg, off, size, strict);
- case PTR_TO_MAP_VALUE_ADJ:
- return check_val_ptr_alignment(reg, size, strict);
+ case PTR_TO_MAP_VALUE:
+ pointer_desc = "value ";
+ break;
+ case PTR_TO_CTX:
+ pointer_desc = "context ";
+ break;
+ case PTR_TO_STACK:
+ pointer_desc = "stack ";
+ break;
default:
- if (off % size != 0) {
- verbose("misaligned access off %d size %d\n",
- off, size);
- return -EACCES;
- }
-
- return 0;
+ break;
}
+ return check_generic_ptr_alignment(reg, pointer_desc, off, size, strict);
}
/* check whether memory at (regno + off) is accessible for t = (read | write)
@@ -883,52 +954,79 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
struct bpf_reg_state *reg = &state->regs[regno];
int size, err = 0;
- if (reg->type == PTR_TO_STACK)
- off += reg->imm;
-
size = bpf_size_to_bytes(bpf_size);
if (size < 0)
return size;
+ /* alignment checks will add in reg->off themselves */
err = check_ptr_alignment(env, reg, off, size);
if (err)
return err;
- if (reg->type == PTR_TO_MAP_VALUE ||
- reg->type == PTR_TO_MAP_VALUE_ADJ) {
+ /* for access checks, reg->off is just part of off */
+ off += reg->off;
+
+ if (reg->type == PTR_TO_MAP_VALUE) {
if (t == BPF_WRITE && value_regno >= 0 &&
is_pointer_value(env, value_regno)) {
verbose("R%d leaks addr into map\n", value_regno);
return -EACCES;
}
- if (reg->type == PTR_TO_MAP_VALUE_ADJ)
- err = check_map_access_adj(env, regno, off, size);
- else
- err = check_map_access(env, regno, off, size);
+ err = check_map_access(env, regno, off, size);
if (!err && t == BPF_READ && value_regno >= 0)
- mark_reg_unknown_value_and_range(state->regs,
- value_regno);
+ mark_reg_unknown(state->regs, value_regno);
} else if (reg->type == PTR_TO_CTX) {
- enum bpf_reg_type reg_type = UNKNOWN_VALUE;
+ enum bpf_reg_type reg_type = SCALAR_VALUE;
if (t == BPF_WRITE && value_regno >= 0 &&
is_pointer_value(env, value_regno)) {
verbose("R%d leaks addr into ctx\n", value_regno);
return -EACCES;
}
+ /* ctx accesses must be at a fixed offset, so that we can
+ * determine what type of data were returned.
+ */
+ if (!tnum_is_const(reg->var_off)) {
+ char tn_buf[48];
+
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose("variable ctx access var_off=%s off=%d size=%d",
+ tn_buf, off, size);
+ return -EACCES;
+ }
+ off += reg->var_off.value;
err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
if (!err && t == BPF_READ && value_regno >= 0) {
- mark_reg_unknown_value_and_range(state->regs,
- value_regno);
- /* note that reg.[id|off|range] == 0 */
+ /* ctx access returns either a scalar, or a
+ * PTR_TO_PACKET[_END]. In the latter case, we know
+ * the offset is zero.
+ */
+ if (reg_type == SCALAR_VALUE)
+ mark_reg_unknown(state->regs, value_regno);
+ else
+ mark_reg_known_zero(state->regs, value_regno);
+ state->regs[value_regno].id = 0;
+ state->regs[value_regno].off = 0;
+ state->regs[value_regno].range = 0;
state->regs[value_regno].type = reg_type;
- state->regs[value_regno].aux_off = 0;
- state->regs[value_regno].aux_off_align = 0;
}
- } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STA