diff options
author | Edward Cree <ecree@solarflare.com> | 2017-08-07 15:26:19 +0100 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-08-08 17:51:34 -0700 |
commit | f1174f77b50c94eecaa658fdc56fa69b421de4b8 (patch) | |
tree | d6e11577190ab47f4c371da21cf1f0f14da66db0 | |
parent | e1cb90f2b83b6b48deeba0ac9f1920693cbad7e1 (diff) | |
download | linux-f1174f77b50c94eecaa658fdc56fa69b421de4b8.tar.gz linux-f1174f77b50c94eecaa658fdc56fa69b421de4b8.tar.bz2 linux-f1174f77b50c94eecaa658fdc56fa69b421de4b8.zip |
bpf/verifier: rework value tracking
Unifies adjusted and unadjusted register value types (e.g. FRAME_POINTER is
now just a PTR_TO_STACK with zero offset).
Tracks value alignment by means of tracking known & unknown bits. This
also replaces the 'reg->imm' (leading zero bits) calculations for (what
were) UNKNOWN_VALUEs.
If pointer leaks are allowed, and adjust_ptr_min_max_vals returns -EACCES,
treat the pointer as an unknown scalar and try again, because we might be
able to conclude something about the result (e.g. pointer & 0x40 is either
0 or 0x40).
Verifier hooks in the netronome/nfp driver were changed to match the new
data structures.
Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/verifier.c | 24 | ||||
-rw-r--r-- | include/linux/bpf.h | 34 | ||||
-rw-r--r-- | include/linux/bpf_verifier.h | 34 | ||||
-rw-r--r-- | include/linux/tnum.h | 79 | ||||
-rw-r--r-- | kernel/bpf/Makefile | 2 | ||||
-rw-r--r-- | kernel/bpf/tnum.c | 164 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 1780 |
7 files changed, 1265 insertions, 852 deletions
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index d696ba46f70a..5b783a91b115 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -79,28 +79,32 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, const struct bpf_verifier_env *env) { const struct bpf_reg_state *reg0 = &env->cur_state.regs[0]; + u64 imm; if (nfp_prog->act == NN_ACT_XDP) return 0; - if (reg0->type != CONST_IMM) { - pr_info("unsupported exit state: %d, imm: %llx\n", - reg0->type, reg0->imm); + if (!(reg0->type == SCALAR_VALUE && tnum_is_const(reg0->var_off))) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg0->var_off); + pr_info("unsupported exit state: %d, var_off: %s\n", + reg0->type, tn_buf); return -EINVAL; } - if (nfp_prog->act != NN_ACT_DIRECT && - reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) { + imm = reg0->var_off.value; + if (nfp_prog->act != NN_ACT_DIRECT && imm != 0 && (imm & ~0U) != ~0U) { pr_info("unsupported exit state: %d, imm: %llx\n", - reg0->type, reg0->imm); + reg0->type, imm); return -EINVAL; } - if (nfp_prog->act == NN_ACT_DIRECT && reg0->imm <= TC_ACT_REDIRECT && - reg0->imm != TC_ACT_SHOT && reg0->imm != TC_ACT_STOLEN && - reg0->imm != TC_ACT_QUEUED) { + if (nfp_prog->act == NN_ACT_DIRECT && imm <= TC_ACT_REDIRECT && + imm != TC_ACT_SHOT && imm != TC_ACT_STOLEN && + imm != TC_ACT_QUEUED) { pr_info("unsupported exit state: %d, imm: %llx\n", - reg0->type, reg0->imm); + reg0->type, imm); return -EINVAL; } diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6353c7474dba..39229c455cba 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -117,35 +117,25 @@ enum bpf_access_type { }; /* types of values stored in eBPF registers */ +/* Pointer types represent: + * pointer + * pointer + imm + * pointer + (u16) var + * pointer + (u16) var + imm + * if (range > 0) then [ptr, ptr + range - off) is safe to access + * if (id > 0) means that some 'var' was added + * if (off > 0) means that 'imm' was added + */ enum bpf_reg_type { NOT_INIT = 0, /* nothing was written into register */ - UNKNOWN_VALUE, /* reg doesn't contain a valid pointer */ + SCALAR_VALUE, /* reg doesn't contain a valid pointer */ PTR_TO_CTX, /* reg points to bpf_context */ CONST_PTR_TO_MAP, /* reg points to struct bpf_map */ PTR_TO_MAP_VALUE, /* reg points to map element value */ PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */ - FRAME_PTR, /* reg == frame_pointer */ - PTR_TO_STACK, /* reg == frame_pointer + imm */ - CONST_IMM, /* constant integer value */ - - /* PTR_TO_PACKET represents: - * skb->data - * skb->data + imm - * skb->data + (u16) var - * skb->data + (u16) var + imm - * if (range > 0) then [ptr, ptr + range - off) is safe to access - * if (id > 0) means that some 'var' was added - * if (off > 0) menas that 'imm' was added - */ - PTR_TO_PACKET, + PTR_TO_STACK, /* reg == frame_pointer + offset */ + PTR_TO_PACKET, /* reg points to skb->data */ PTR_TO_PACKET_END, /* skb->data + headlen */ - - /* PTR_TO_MAP_VALUE_ADJ is used for doing pointer math inside of a map - * elem value. We only allow this if we can statically verify that - * access from this register are going to fall within the size of the - * map element. - */ - PTR_TO_MAP_VALUE_ADJ, }; struct bpf_prog; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 8e5d31f6faef..85936fa92d12 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -9,6 +9,7 @@ #include <linux/bpf.h> /* for enum bpf_reg_type */ #include <linux/filter.h> /* for MAX_BPF_STACK */ +#include <linux/tnum.h> /* Just some arbitrary values so we can safely do math without overflowing and * are obviously wrong for any sort of memory access. @@ -19,30 +20,37 @@ struct bpf_reg_state { enum bpf_reg_type type; union { - /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ - s64 imm; - - /* valid when type == PTR_TO_PACKET* */ - struct { - u16 off; - u16 range; - }; + /* valid when type == PTR_TO_PACKET */ + u16 range; /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | * PTR_TO_MAP_VALUE_OR_NULL */ struct bpf_map *map_ptr; }; + /* Fixed part of pointer offset, pointer types only */ + s32 off; + /* For PTR_TO_PACKET, used to find other pointers with the same variable + * offset, so they can share range knowledge. + * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we + * came from, when one is tested for != NULL. + */ u32 id; + /* These three fields must be last. See states_equal() */ + /* For scalar types (SCALAR_VALUE), this represents our knowledge of + * the actual value. + * For pointer types, this represents the variable part of the offset + * from the pointed-to object, and is shared with all bpf_reg_states + * with the same id as us. + */ + struct tnum var_off; /* Used to determine if any memory access using this register will - * result in a bad access. These two fields must be last. - * See states_equal() + * result in a bad access. + * These refer to the same value as var_off, not necessarily the actual + * contents of the register. */ s64 min_value; u64 max_value; - u32 min_align; - u32 aux_off; - u32 aux_off_align; bool value_from_signed; }; diff --git a/include/linux/tnum.h b/include/linux/tnum.h new file mode 100644 index 000000000000..a0b07bf1842b --- /dev/null +++ b/include/linux/tnum.h @@ -0,0 +1,79 @@ +/* tnum: tracked (or tristate) numbers + * + * A tnum tracks knowledge about the bits of a value. Each bit can be either + * known (0 or 1), or unknown (x). Arithmetic operations on tnums will + * propagate the unknown bits such that the tnum result represents all the + * possible results for possible values of the operands. + */ +#include <linux/types.h> + +struct tnum { + u64 value; + u64 mask; +}; + +/* Constructors */ +/* Represent a known constant as a tnum. */ +struct tnum tnum_const(u64 value); +/* A completely unknown value */ +extern const struct tnum tnum_unknown; + +/* Arithmetic and logical ops */ +/* Shift a tnum left (by a fixed shift) */ +struct tnum tnum_lshift(struct tnum a, u8 shift); +/* Shift a tnum right (by a fixed shift) */ +struct tnum tnum_rshift(struct tnum a, u8 shift); +/* Add two tnums, return @a + @b */ +struct tnum tnum_add(struct tnum a, struct tnum b); +/* Subtract two tnums, return @a - @b */ +struct tnum tnum_sub(struct tnum a, struct tnum b); +/* Bitwise-AND, return @a & @b */ +struct tnum tnum_and(struct tnum a, struct tnum b); +/* Bitwise-OR, return @a | @b */ +struct tnum tnum_or(struct tnum a, struct tnum b); +/* Bitwise-XOR, return @a ^ @b */ +struct tnum tnum_xor(struct tnum a, struct tnum b); +/* Multiply two tnums, return @a * @b */ +struct tnum tnum_mul(struct tnum a, struct tnum b); + +/* Return a tnum representing numbers satisfying both @a and @b */ +struct tnum tnum_intersect(struct tnum a, struct tnum b); + +/* Return @a with all but the lowest @size bytes cleared */ +struct tnum tnum_cast(struct tnum a, u8 size); + +/* Returns true if @a is a known constant */ +static inline bool tnum_is_const(struct tnum a) +{ + return !a.mask; +} + +/* Returns true if @a == tnum_const(@b) */ +static inline bool tnum_equals_const(struct tnum a, u64 b) +{ + return tnum_is_const(a) && a.value == b; +} + +/* Returns true if @a is completely unknown */ +static inline bool tnum_is_unknown(struct tnum a) +{ + return !~a.mask; +} + +/* Returns true if @a is known to be a multiple of @size. + * @size must be a power of two. + */ +bool tnum_is_aligned(struct tnum a, u64 size); + +/* Returns true if @b represents a subset of @a. */ +bool tnum_in(struct tnum a, struct tnum b); + +/* Formatting functions. These have snprintf-like semantics: they will write + * up to @size bytes (including the terminating NUL byte), and return the number + * of bytes (excluding the terminating NUL) which would have been written had + * sufficient space been available. (Thus tnum_sbin always returns 64.) + */ +/* Format a tnum as a pair of hex numbers (value; mask) */ +int tnum_strn(char *str, size_t size, struct tnum a); +/* Format a tnum as tristate binary expansion */ +int tnum_sbin(char *str, size_t size, struct tnum a); diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 48e92705be59..2f0bcda40e90 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -1,6 +1,6 @@ obj-y := core.o -obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o ifeq ($(CONFIG_NET),y) obj-$(CONFIG_BPF_SYSCALL) += devmap.o diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c new file mode 100644 index 000000000000..92eeeb1974a2 --- /dev/null +++ b/kernel/bpf/tnum.c @@ -0,0 +1,164 @@ +/* tnum: tracked (or tristate) numbers + * + * A tnum tracks knowledge about the bits of a value. Each bit can be either + * known (0 or 1), or unknown (x). Arithmetic operations on tnums will + * propagate the unknown bits such that the tnum result represents all the + * possible results for possible values of the operands. + */ +#include <linux/kernel.h> +#include <linux/tnum.h> + +#define TNUM(_v, _m) (struct tnum){.value = _v, .mask = _m} +/* A completely unknown value */ +const struct tnum tnum_unknown = { .value = 0, .mask = -1 }; + +struct tnum tnum_const(u64 value) +{ + return TNUM(value, 0); +} + +struct tnum tnum_lshift(struct tnum a, u8 shift) +{ + return TNUM(a.value << shift, a.mask << shift); +} + +struct tnum tnum_rshift(struct tnum a, u8 shift) +{ + return TNUM(a.value >> shift, a.mask >> shift); +} + +struct tnum tnum_add(struct tnum a, struct tnum b) +{ + u64 sm, sv, sigma, chi, mu; + + sm = a.mask + b.mask; + sv = a.value + b.value; + sigma = sm + sv; + chi = sigma ^ sv; + mu = chi | a.mask | b.mask; + return TNUM(sv & ~mu, mu); +} + +struct tnum tnum_sub(struct tnum a, struct tnum b) +{ + u64 dv, alpha, beta, chi, mu; + + dv = a.value - b.value; + alpha = dv + a.mask; + beta = dv - b.mask; + chi = alpha ^ beta; + mu = chi | a.mask | b.mask; + return TNUM(dv & ~mu, mu); +} + +struct tnum tnum_and(struct tnum a, struct tnum b) +{ + u64 alpha, beta, v; + + alpha = a.value | a.mask; + beta = b.value | b.mask; + v = a.value & b.value; + return TNUM(v, alpha & beta & ~v); +} + +struct tnum tnum_or(struct tnum a, struct tnum b) +{ + u64 v, mu; + + v = a.value | b.value; + mu = a.mask | b.mask; + return TNUM(v, mu & ~v); +} + +struct tnum tnum_xor(struct tnum a, struct tnum b) +{ + u64 v, mu; + + v = a.value ^ b.value; + mu = a.mask | b.mask; + return TNUM(v & ~mu, mu); +} + +/* half-multiply add: acc += (unknown * mask * value). + * An intermediate step in the multiply algorithm. + */ +static struct tnum hma(struct tnum acc, u64 value, u64 mask) +{ + while (mask) { + if (mask & 1) + acc = tnum_add(acc, TNUM(0, value)); + mask >>= 1; + value <<= 1; + } + return acc; +} + +struct tnum tnum_mul(struct tnum a, struct tnum b) +{ + struct tnum acc; + u64 pi; + + pi = a.value * b.value; + acc = hma(TNUM(pi, 0), a.mask, b.mask | b.value); + return hma(acc, b.mask, a.value); +} + +/* Note that if a and b disagree - i.e. one has a 'known 1' where the other has + * a 'known 0' - this will return a 'known 1' for that bit. + */ +struct tnum tnum_intersect(struct tnum a, struct tnum b) +{ + u64 v, mu; + + v = a.value | b.value; + mu = a.mask & b.mask; + return TNUM(v & ~mu, mu); +} + +struct tnum tnum_cast(struct tnum a, u8 size) +{ + a.value &= (1ULL << (size * 8)) - 1; + a.mask &= (1ULL << (size * 8)) - 1; + return a; +} + +bool tnum_is_aligned(struct tnum a, u64 size) +{ + if (!size) + return true; + return !((a.value | a.mask) & (size - 1)); +} + +bool tnum_in(struct tnum a, struct tnum b) +{ + if (b.mask & ~a.mask) + return false; + b.value &= ~a.mask; + return a.value == b.value; +} + +int tnum_strn(char *str, size_t size, struct tnum a) +{ + return snprintf(str, size, "(%#llx; %#llx)", a.value, a.mask); +} +EXPORT_SYMBOL_GPL(tnum_strn); + +int tnum_sbin(char *str, size_t size, struct tnum a) +{ + size_t n; + + for (n = 64; n; n--) { + if (n < size) { + if (a.mask & 1) + str[n - 1] = 'x'; + else if (a.value & 1) + str[n - 1] = '1'; + else + str[n - 1] = '0'; + } + a.mask >>= 1; + a.value >>= 1; + } + str[min(size - 1, (size_t)64)] = 0; + return 64; +} diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f6e8b3887eab..c3f88b466c30 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -61,12 +61,12 @@ * (and -20 constant is saved for further stack bounds checking). * Meaning that this reg is a pointer to stack plus known immediate constant. * - * Most of the time the registers have UNKNOWN_VALUE type, which + * Most of the time the registers have SCALAR_VALUE type, which * means the register has some value, but it's not a valid pointer. - * (like pointer plus pointer becomes UNKNOWN_VALUE type) + * (like pointer plus pointer becomes SCALAR_VALUE type) * * When verifier sees load or store instructions the type of base register - * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, FRAME_PTR. These are three pointer + * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer * types recognized by check_mem_access() function. * * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value' @@ -180,15 +180,12 @@ static __printf(1, 2) void verbose(const char *fmt, ...) /* string representation of 'enum bpf_reg_type' */ static const char * const reg_type_str[] = { [NOT_INIT] = "?", - [UNKNOWN_VALUE] = "inv", + [SCALAR_VALUE] = "inv", [PTR_TO_CTX] = "ctx", [CONST_PTR_TO_MAP] = "map_ptr", [PTR_TO_MAP_VALUE] = "map_value", [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null", - [PTR_TO_MAP_VALUE_ADJ] = "map_value_adj", - [FRAME_PTR] = "fp", [PTR_TO_STACK] = "fp", - [CONST_IMM] = "imm", [PTR_TO_PACKET] = "pkt", [PTR_TO_PACKET_END] = "pkt_end", }; @@ -221,32 +218,36 @@ static void print_verifier_state(struct bpf_verifier_state *state) if (t == NOT_INIT) continue; verbose(" R%d=%s", i, reg_type_str[t]); - if (t == CONST_IMM || t == PTR_TO_STACK) - verbose("%lld", reg->imm); - else if (t == PTR_TO_PACKET) - verbose("(id=%d,off=%d,r=%d)", - reg->id, reg->off, reg->range); - else if (t == UNKNOWN_VALUE && reg->imm) - verbose("%lld", reg->imm); - else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE || - t == PTR_TO_MAP_VALUE_OR_NULL || - t == PTR_TO_MAP_VALUE_ADJ) - verbose("(ks=%d,vs=%d,id=%u)", - reg->map_ptr->key_size, - reg->map_ptr->value_size, - reg->id); - if (reg->min_value != BPF_REGISTER_MIN_RANGE) - verbose(",min_value=%lld", - (long long)reg->min_value); - if (reg->max_value != BPF_REGISTER_MAX_RANGE) - verbose(",max_value=%llu", - (unsigned long long)reg->max_value); - if (reg->min_align) - verbose(",min_align=%u", reg->min_align); - if (reg->aux_off) - verbose(",aux_off=%u", reg->aux_off); - if (reg->aux_off_align) - verbose(",aux_off_align=%u", reg->aux_off_align); + if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && + tnum_is_const(reg->var_off)) { + /* reg->off should be 0 for SCALAR_VALUE */ + verbose("%lld", reg->var_off.value + reg->off); + } else { + verbose("(id=%d", reg->id); + if (t != SCALAR_VALUE) + verbose(",off=%d", reg->off); + if (t == PTR_TO_PACKET) + verbose(",r=%d", reg->range); + else if (t == CONST_PTR_TO_MAP || + t == PTR_TO_MAP_VALUE || + t == PTR_TO_MAP_VALUE_OR_NULL) + verbose(",ks=%d,vs=%d", + reg->map_ptr->key_size, + reg->map_ptr->value_size); + if (reg->min_value != BPF_REGISTER_MIN_RANGE) + verbose(",min_value=%lld", + (long long)reg->min_value); + if (reg->max_value != BPF_REGISTER_MAX_RANGE) + verbose(",max_value=%llu", + (unsigned long long)reg->max_value); + if (!tnum_is_unknown(reg->var_off)) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(",var_off=%s", tn_buf); + } + verbose(")"); + } } for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { if (state->stack_slot_type[i] == STACK_SPILL) @@ -463,14 +464,69 @@ static const int caller_saved[CALLER_SAVED_REGS] = { BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 }; -static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno) +static void __mark_reg_not_init(struct bpf_reg_state *reg); + +/* Mark the 'variable offset' part of a register as zero. This should be + * used only on registers holding a pointer type. + */ +static void __mark_reg_known_zero(struct bpf_reg_state *reg) { - BUG_ON(regno >= MAX_BPF_REG); + reg->var_off = tnum_const(0); + reg->min_value = 0; + reg->max_value = 0; +} - memset(®s[regno], 0, sizeof(regs[regno])); - regs[regno].type = NOT_INIT; - regs[regno].min_value = BPF_REGISTER_MIN_RANGE; - regs[regno].max_value = BPF_REGISTER_MAX_RANGE; +static void mark_reg_known_zero(struct bpf_reg_state *regs, u32 regno) +{ + if (WARN_ON(regno >= MAX_BPF_REG)) { + verbose("mark_reg_known_zero(regs, %u)\n", regno); + /* Something bad happened, let's kill all regs */ + for (regno = 0; regno < MAX_BPF_REG; regno++) + __mark_reg_not_init(regs + regno); + return; + } + __mark_reg_known_zero(regs + regno); +} + +/* Mark a register as having a completely unknown (scalar) value. */ +static void __mark_reg_unknown(struct bpf_reg_state *reg) +{ + reg->type = SCALAR_VALUE; + reg->id = 0; + reg->off = 0; + reg->var_off = tnum_unknown; + reg->min_value = BPF_REGISTER_MIN_RANGE; + reg->max_value = BPF_REGISTER_MAX_RANGE; +} + +static void mark_reg_unknown(struct bpf_reg_state *regs, u32 regno) +{ + if (WARN_ON(regno >= MAX_BPF_REG)) { + verbose("mark_reg_unknown(regs, %u)\n", regno); + /* Something bad happened, let's kill all regs */ + for (regno = 0; regno < MAX_BPF_REG; regno++) + __mark_reg_not_init(regs + regno); + return; + } + __mark_reg_unknown(regs + regno); +} + +static void __mark_reg_not_init(struct bpf_reg_state *reg) +{ + __mark_reg_unknown(reg); + reg->type = NOT_INIT; +} + +static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno) +{ + if (WARN_ON(regno >= MAX_BPF_REG)) { + verbose("mark_reg_not_init(regs, %u)\n", regno); + /* Something bad happened, let's kill all regs */ + for (regno = 0; regno < MAX_BPF_REG; regno++) + __mark_reg_not_init(regs + regno); + return; + } + __mark_reg_not_init(regs + regno); } static void init_reg_state(struct bpf_reg_state *regs) @@ -481,23 +537,12 @@ static void init_reg_state(struct bpf_reg_state *regs) mark_reg_not_init(regs, i); /* frame pointer */ - regs[BPF_REG_FP].type = FRAME_PTR; + regs[BPF_REG_FP].type = PTR_TO_STACK; + mark_reg_known_zero(regs, BPF_REG_FP); /* 1st arg to a function */ regs[BPF_REG_1].type = PTR_TO_CTX; -} - -static void __mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno) -{ - regs[regno].type = UNKNOWN_VALUE; - regs[regno].id = 0; - regs[regno].imm = 0; -} - -static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno) -{ - BUG_ON(regno >= MAX_BPF_REG); - __mark_reg_unknown_value(regs, regno); + mark_reg_known_zero(regs, BPF_REG_1); } static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno) @@ -505,14 +550,6 @@ static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno) regs[regno].min_value = BPF_REGISTER_MIN_RANGE; regs[regno].max_value = BPF_REGISTER_MAX_RANGE; regs[regno].value_from_signed = false; - regs[regno].min_align = 0; -} - -static void mark_reg_unknown_value_and_range(struct bpf_reg_state *regs, - u32 regno) -{ - mark_reg_unknown_value(regs, regno); - reset_reg_range_values(regs, regno); } enum reg_arg_type { @@ -542,7 +579,7 @@ static int check_reg_arg(struct bpf_reg_state *regs, u32 regno, return -EACCES; } if (t == DST_OP) - mark_reg_unknown_value(regs, regno); + mark_reg_unknown(regs, regno); } return 0; } @@ -552,12 +589,10 @@ static bool is_spillable_regtype(enum bpf_reg_type type) switch (type) { case PTR_TO_MAP_VALUE: case PTR_TO_MAP_VALUE_OR_NULL: - case PTR_TO_MAP_VALUE_ADJ: case PTR_TO_STACK: case PTR_TO_CTX: case PTR_TO_PACKET: case PTR_TO_PACKET_END: - case FRAME_PTR: case CONST_PTR_TO_MAP: return true; default: @@ -637,14 +672,13 @@ static int check_stack_read(struct bpf_verifier_state *state, int off, int size, } if (value_regno >= 0) /* have read misc data from the stack */ - mark_reg_unknown_value_and_range(state->regs, - value_regno); + mark_reg_unknown(state->regs, value_regno); return 0; } } /* check read/write into map element returned by bpf_map_lookup_elem() */ -static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off, +static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off, int size) { struct bpf_map *map = env->cur_state.regs[regno].map_ptr; @@ -657,22 +691,25 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off, return 0; } -/* check read/write into an adjusted map element */ -static int check_map_access_adj(struct bpf_verifier_env *env, u32 regno, +/* check read/write into a map element with possible variable offset */ +static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off, int size) { struct bpf_verifier_state *state = &env->cur_state; struct bpf_reg_state *reg = &state->regs[regno]; int err; - /* We adjusted the register to this map value, so we - * need to change off and size to min_value and max_value - * respectively to make sure our theoretical access will be - * safe. + /* We may have adjusted the register to this map value, so we + * need to try adding each of min_value and max_value to off + * to make sure our theoretical access will be safe. */ if (log_level) print_verifier_state(state); - env->varlen_map_value_access = true; + /* If the offset is variable, we will need to be stricter in state + * pruning from now on. + */ + if (!tnum_is_const(reg->var_off)) + env->varlen_map_value_access = true; /* The minimum value is only important with signed * comparisons where we can't assume the floor of a * value is 0. If we are using signed variables for our @@ -684,10 +721,9 @@ static int check_map_access_adj(struct bpf_verifier_env *env, u32 regno, regno); return -EACCES; } - err = check_map_access(env, regno, reg->min_value + off, size); + err = __check_map_access(env, regno, reg->min_value + off, size); if (err) { - verbose("R%d min value is outside of the array range\n", - regno); + verbose("R%d min value is outside of the array range\n", regno); return err; } @@ -699,7 +735,10 @@ static int check_map_access_adj(struct bpf_verifier_env *env, u32 regno, regno); return -EACCES; } - return check_map_access(env, regno, reg->max_value + off, size); + err = __check_map_access(env, regno, reg->max_value + off, size); + if (err) + verbose("R%d max value is outside of the array range\n", regno); + return err; } #define MAX_PACKET_OFF 0xffff @@ -729,14 +768,13 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, } } -static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, - int size) +static int __check_packet_access(struct bpf_verifier_env *env, u32 regno, + int off, int size) { struct bpf_reg_state *regs = env->cur_state.regs; struct bpf_reg_state *reg = ®s[regno]; - off += reg->off; - if (off < 0 || size <= 0 || off + size > reg->range) { + if (off < 0 || size <= 0 || (u64)off + size > reg->range) { verbose("invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", off, size, regno, reg->id, reg->off, reg->range); return -EACCES; @@ -744,7 +782,35 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, return 0; } -/* check access to 'struct bpf_context' fields */ +static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, + int size) +{ + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *reg = ®s[regno]; + int err; + + /* We may have added a variable offset to the packet pointer; but any + * reg->range we have comes after that. We are only checking the fixed + * offset. + */ + + /* We don't allow negative numbers, because we aren't tracking enough + * detail to prove they're safe. + */ + if (reg->min_value < 0) { + verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", + regno); + return -EACCES; + } + err = __check_packet_access(env, regno, off, size); + if (err) { + verbose("R%d offset is outside of the packet\n", regno); + return err; + } + return err; +} + +/* check access to 'struct bpf_context' fields. Supports fixed offsets only */ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, enum bpf_access_type t, enum bpf_reg_type *reg_type) { @@ -784,13 +850,7 @@ static bool __is_pointer_value(bool allow_ptr_leaks, if (allow_ptr_leaks) return false; - switch (reg->type) { - case UNKNOWN_VALUE: - case CONST_IMM: - return false; - default: - return true; - } + return reg->type != SCALAR_VALUE; } static bool is_pointer_value(struct bpf_verifier_env *env, int regno) @@ -801,23 +861,13 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, int off, int size, bool strict) { + struct tnum reg_off; int ip_align; - int reg_off; /* Byte size accesses are always allowed. */ if (!strict || size == 1) return 0; - reg_off = reg->off; - if (reg->id) { - if (reg->aux_off_align % size) { - verbose("Packet access is only %u byte aligned, %d byte access not allowed\n", - reg->aux_off_align, size); - return -EACCES; - } - reg_off += reg->aux_off; - } - /* For platforms that do not have a Kconfig enabling * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of * NET_IP_ALIGN is universally set to '2'. And on platforms @@ -827,20 +877,37 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, * unconditional IP align value of '2'. */ ip_align = 2; - if ((ip_align + reg_off + off) % size != 0) { - verbose("misaligned packet access off %d+%d+%d size %d\n", - ip_align, reg_off, off, size); + + reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off)); + if (!tnum_is_aligned(reg_off, size)) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose("misaligned packet access off %d+%s+%d+%d size %d\n", + ip_align, tn_buf, reg->off, off, size); return -EACCES; } return 0; } -static int check_val_ptr_alignment(const struct bpf_reg_state *reg, - int size, bool strict) +static int check_generic_ptr_alignment(const struct bpf_reg_state *reg, + const char *pointer_desc, + int off, int size, bool strict) { - if (strict && size != 1) { - verbose("Unknown alignment. Only byte-sized access allowed in value access.\n"); + struct tnum reg_off; + + /* Byte size accesses are always allowed. */ + if (!strict || size == 1) + return 0; + + reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off)); + if (!tnum_is_aligned(reg_off, size)) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose("misaligned %saccess off %s+%d+%d size %d\n", + pointer_desc, tn_buf, reg->off, off, size); return -EACCES; } @@ -852,21 +919,25 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, int off, int size) { bool strict = env->strict_alignment; + const char *pointer_desc = ""; switch (reg->type) { case PTR_TO_PACKET: + /* special case, because of NET_IP_ALIGN */ return check_pkt_ptr_alignment(reg, off, size, strict); - case PTR_TO_MAP_VALUE_ADJ: - return check_val_ptr_alignment(reg, size, strict); + case PTR_TO_MAP_VALUE: + pointer_desc = "value "; + break; + case PTR_TO_CTX: + pointer_desc = "context "; + break; + case PTR_TO_STACK: + pointer_desc = "stack "; + break; default: - if (off % size != 0) { - verbose("misaligned access off %d size %d\n", - off, size); - return -EACCES; - } - - return 0; + break; } + return check_generic_ptr_alignment(reg, pointer_desc, off, size, strict); } /* check whether memory at (regno + off) is accessible for t = (read | write) @@ -883,52 +954,79 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn struct bpf_reg_state *reg = &state->regs[regno]; int size, err = 0; - if (reg->type == PTR_TO_STACK) - off += reg->imm; - size = bpf_size_to_bytes(bpf_size); if (size < 0) return size; + /* alignment checks will add in reg->off themselves */ err = check_ptr_alignment(env, reg, off, size); if (err) return err; - if (reg->type == PTR_TO_MAP_VALUE || - reg->type == PTR_TO_MAP_VALUE_ADJ) { + /* for access checks, reg->off is just part of off */ + off += reg->off; + + if (reg->type == PTR_TO_MAP_VALUE) { if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { verbose("R%d leaks addr into map\n", value_regno); return -EACCES; } - if (reg->type == PTR_TO_MAP_VALUE_ADJ) - err = check_map_access_adj(env, regno, off, size); - else - err = check_map_access(env, regno, off, size); + err = check_map_access(env, regno, off, size); if (!err && t == BPF_READ && value_regno >= 0) - mark_reg_unknown_value_and_range(state->regs, - value_regno); + mark_reg_unknown(state->regs, value_regno); } else if (reg->type == PTR_TO_CTX) { - enum bpf_reg_type reg_type = UNKNOWN_VALUE; + enum bpf_reg_type reg_type = SCALAR_VALUE; if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { verbose("R%d leaks addr into ctx\n", value_regno); return -EACCES; } + /* ctx accesses must be at a fixed offset, so that we can + * determine what type of data were returned. + */ + if (!tnum_is_const(reg->var_off)) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose("variable ctx access var_off=%s off=%d size=%d", + tn_buf, off, size); + return -EACCES; + } + off += reg->var_off.value; err = check_ctx_access(env, insn_idx, off, size, t, ®_type); if (!err && t == BPF_READ && value_regno >= 0) { - mark_reg_unknown_value_and_range(state->regs, - value_regno); - /* note that reg.[id|off|range] == 0 */ + /* ctx access returns either a scalar, or a + * PTR_TO_PACKET[_END]. In the latter case, we know + * the offset is zero. + */ + if (reg_type == SCALAR_VALUE) + mark_reg_unknown(state->regs, value_regno); + else + mark_reg_known_zero(state->regs, value_regno); + state->regs[value_regno].id = 0; + state->regs[value_regno].off = 0; + state->regs[value_regno].range = 0; state->regs[value_regno].type = reg_type; - state->regs[value_regno].aux_off = 0; - state->regs[value_regno].aux_off_align = 0; } - } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STA |