diff options
author | Tiezhu Yang <yangtiezhu@loongson.cn> | 2022-10-12 16:36:20 +0800 |
---|---|---|
committer | Huacai Chen <chenhuacai@loongson.cn> | 2022-10-12 16:36:20 +0800 |
commit | 5dc615520c4dfb358245680f1904bad61116648e (patch) | |
tree | 4d12dbcf06e480d4290ffeaf364c8c8158586811 | |
parent | 4e59e5a46936dd649208f348ead678c35197203d (diff) | |
download | linux-5dc615520c4dfb358245680f1904bad61116648e.tar.gz linux-5dc615520c4dfb358245680f1904bad61116648e.tar.bz2 linux-5dc615520c4dfb358245680f1904bad61116648e.zip |
LoongArch: Add BPF JIT support
BPF programs are normally handled by a BPF interpreter, add BPF JIT
support for LoongArch to allow the kernel to generate native code when
a program is loaded into the kernel. This will significantly speed-up
processing of BPF programs.
Co-developed-by: Youling Tang <tangyouling@loongson.cn>
Signed-off-by: Youling Tang <tangyouling@loongson.cn>
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
-rw-r--r-- | arch/loongarch/Kbuild | 1 | ||||
-rw-r--r-- | arch/loongarch/Kconfig | 1 | ||||
-rw-r--r-- | arch/loongarch/include/asm/inst.h | 221 | ||||
-rw-r--r-- | arch/loongarch/include/uapi/asm/bpf_perf_event.h | 9 | ||||
-rw-r--r-- | arch/loongarch/net/Makefile | 7 | ||||
-rw-r--r-- | arch/loongarch/net/bpf_jit.c | 1179 | ||||
-rw-r--r-- | arch/loongarch/net/bpf_jit.h | 282 |
7 files changed, 1700 insertions, 0 deletions
diff --git a/arch/loongarch/Kbuild b/arch/loongarch/Kbuild index ab5373d0a24f..b01f5cdb27e0 100644 --- a/arch/loongarch/Kbuild +++ b/arch/loongarch/Kbuild @@ -1,5 +1,6 @@ obj-y += kernel/ obj-y += mm/ +obj-y += net/ obj-y += vdso/ # for cleaning diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 2837ac5413c0..5c7c2a7762b7 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -87,6 +87,7 @@ config LOONGARCH select HAVE_CONTEXT_TRACKING_USER select HAVE_DEBUG_STACKOVERFLOW select HAVE_DMA_CONTIGUOUS + select HAVE_EBPF_JIT select HAVE_EXIT_THREAD select HAVE_FAST_GUP select HAVE_GENERIC_VDSO diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h index 63df7efff276..fce1843ceebb 100644 --- a/arch/loongarch/include/asm/inst.h +++ b/arch/loongarch/include/asm/inst.h @@ -345,4 +345,225 @@ static inline bool unsigned_imm_check(unsigned long val, unsigned int bit) return val < (1UL << bit); } +#define DEF_EMIT_REG0I26_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + int offset) \ +{ \ + unsigned int immediate_l, immediate_h; \ + \ + immediate_l = offset & 0xffff; \ + offset >>= 16; \ + immediate_h = offset & 0x3ff; \ + \ + insn->reg0i26_format.opcode = OP; \ + insn->reg0i26_format.immediate_l = immediate_l; \ + insn->reg0i26_format.immediate_h = immediate_h; \ +} + +DEF_EMIT_REG0I26_FORMAT(b, b_op) + +#define DEF_EMIT_REG1I20_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, int imm) \ +{ \ + insn->reg1i20_format.opcode = OP; \ + insn->reg1i20_format.immediate = imm; \ + insn->reg1i20_format.rd = rd; \ +} + +DEF_EMIT_REG1I20_FORMAT(lu12iw, lu12iw_op) +DEF_EMIT_REG1I20_FORMAT(lu32id, lu32id_op) +DEF_EMIT_REG1I20_FORMAT(pcaddu18i, pcaddu18i_op) + +#define DEF_EMIT_REG2_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj) \ +{ \ + insn->reg2_format.opcode = OP; \ + insn->reg2_format.rd = rd; \ + insn->reg2_format.rj = rj; \ +} + +DEF_EMIT_REG2_FORMAT(revb2h, revb2h_op) +DEF_EMIT_REG2_FORMAT(revb2w, revb2w_op) +DEF_EMIT_REG2_FORMAT(revbd, revbd_op) + +#define DEF_EMIT_REG2I5_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj, \ + int imm) \ +{ \ + insn->reg2i5_format.opcode = OP; \ + insn->reg2i5_format.immediate = imm; \ + insn->reg2i5_format.rd = rd; \ + insn->reg2i5_format.rj = rj; \ +} + +DEF_EMIT_REG2I5_FORMAT(slliw, slliw_op) +DEF_EMIT_REG2I5_FORMAT(srliw, srliw_op) +DEF_EMIT_REG2I5_FORMAT(sraiw, sraiw_op) + +#define DEF_EMIT_REG2I6_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj, \ + int imm) \ +{ \ + insn->reg2i6_format.opcode = OP; \ + insn->reg2i6_format.immediate = imm; \ + insn->reg2i6_format.rd = rd; \ + insn->reg2i6_format.rj = rj; \ +} + +DEF_EMIT_REG2I6_FORMAT(sllid, sllid_op) +DEF_EMIT_REG2I6_FORMAT(srlid, srlid_op) +DEF_EMIT_REG2I6_FORMAT(sraid, sraid_op) + +#define DEF_EMIT_REG2I12_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj, \ + int imm) \ +{ \ + insn->reg2i12_format.opcode = OP; \ + insn->reg2i12_format.immediate = imm; \ + insn->reg2i12_format.rd = rd; \ + insn->reg2i12_format.rj = rj; \ +} + +DEF_EMIT_REG2I12_FORMAT(addiw, addiw_op) +DEF_EMIT_REG2I12_FORMAT(addid, addid_op) +DEF_EMIT_REG2I12_FORMAT(lu52id, lu52id_op) +DEF_EMIT_REG2I12_FORMAT(andi, andi_op) +DEF_EMIT_REG2I12_FORMAT(ori, ori_op) +DEF_EMIT_REG2I12_FORMAT(xori, xori_op) +DEF_EMIT_REG2I12_FORMAT(ldbu, ldbu_op) +DEF_EMIT_REG2I12_FORMAT(ldhu, ldhu_op) +DEF_EMIT_REG2I12_FORMAT(ldwu, ldwu_op) +DEF_EMIT_REG2I12_FORMAT(ldd, ldd_op) +DEF_EMIT_REG2I12_FORMAT(stb, stb_op) +DEF_EMIT_REG2I12_FORMAT(sth, sth_op) +DEF_EMIT_REG2I12_FORMAT(stw, stw_op) +DEF_EMIT_REG2I12_FORMAT(std, std_op) + +#define DEF_EMIT_REG2I14_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj, \ + int imm) \ +{ \ + insn->reg2i14_format.opcode = OP; \ + insn->reg2i14_format.immediate = imm; \ + insn->reg2i14_format.rd = rd; \ + insn->reg2i14_format.rj = rj; \ +} + +DEF_EMIT_REG2I14_FORMAT(llw, llw_op) +DEF_EMIT_REG2I14_FORMAT(scw, scw_op) +DEF_EMIT_REG2I14_FORMAT(lld, lld_op) +DEF_EMIT_REG2I14_FORMAT(scd, scd_op) +DEF_EMIT_REG2I14_FORMAT(ldptrw, ldptrw_op) +DEF_EMIT_REG2I14_FORMAT(stptrw, stptrw_op) +DEF_EMIT_REG2I14_FORMAT(ldptrd, ldptrd_op) +DEF_EMIT_REG2I14_FORMAT(stptrd, stptrd_op) + +#define DEF_EMIT_REG2I16_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rj, \ + enum loongarch_gpr rd, \ + int offset) \ +{ \ + insn->reg2i16_format.opcode = OP; \ + insn->reg2i16_format.immediate = offset; \ + insn->reg2i16_format.rj = rj; \ + insn->reg2i16_format.rd = rd; \ +} + +DEF_EMIT_REG2I16_FORMAT(beq, beq_op) +DEF_EMIT_REG2I16_FORMAT(bne, bne_op) +DEF_EMIT_REG2I16_FORMAT(blt, blt_op) +DEF_EMIT_REG2I16_FORMAT(bge, bge_op) +DEF_EMIT_REG2I16_FORMAT(bltu, bltu_op) +DEF_EMIT_REG2I16_FORMAT(bgeu, bgeu_op) +DEF_EMIT_REG2I16_FORMAT(jirl, jirl_op) + +#define DEF_EMIT_REG2BSTRD_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj, \ + int msbd, \ + int lsbd) \ +{ \ + insn->reg2bstrd_format.opcode = OP; \ + insn->reg2bstrd_format.msbd = msbd; \ + insn->reg2bstrd_format.lsbd = lsbd; \ + insn->reg2bstrd_format.rj = rj; \ + insn->reg2bstrd_format.rd = rd; \ +} + +DEF_EMIT_REG2BSTRD_FORMAT(bstrpickd, bstrpickd_op) + +#define DEF_EMIT_REG3_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj, \ + enum loongarch_gpr rk) \ +{ \ + insn->reg3_format.opcode = OP; \ + insn->reg3_format.rd = rd; \ + insn->reg3_format.rj = rj; \ + insn->reg3_format.rk = rk; \ +} + +DEF_EMIT_REG3_FORMAT(addd, addd_op) +DEF_EMIT_REG3_FORMAT(subd, subd_op) +DEF_EMIT_REG3_FORMAT(muld, muld_op) +DEF_EMIT_REG3_FORMAT(divdu, divdu_op) +DEF_EMIT_REG3_FORMAT(moddu, moddu_op) +DEF_EMIT_REG3_FORMAT(and, and_op) +DEF_EMIT_REG3_FORMAT(or, or_op) +DEF_EMIT_REG3_FORMAT(xor, xor_op) +DEF_EMIT_REG3_FORMAT(sllw, sllw_op) +DEF_EMIT_REG3_FORMAT(slld, slld_op) +DEF_EMIT_REG3_FORMAT(srlw, srlw_op) +DEF_EMIT_REG3_FORMAT(srld, srld_op) +DEF_EMIT_REG3_FORMAT(sraw, sraw_op) +DEF_EMIT_REG3_FORMAT(srad, srad_op) +DEF_EMIT_REG3_FORMAT(ldxbu, ldxbu_op) +DEF_EMIT_REG3_FORMAT(ldxhu, ldxhu_op) +DEF_EMIT_REG3_FORMAT(ldxwu, ldxwu_op) +DEF_EMIT_REG3_FORMAT(ldxd, ldxd_op) +DEF_EMIT_REG3_FORMAT(stxb, stxb_op) +DEF_EMIT_REG3_FORMAT(stxh, stxh_op) +DEF_EMIT_REG3_FORMAT(stxw, stxw_op) +DEF_EMIT_REG3_FORMAT(stxd, stxd_op) +DEF_EMIT_REG3_FORMAT(amaddw, amaddw_op) +DEF_EMIT_REG3_FORMAT(amaddd, amaddd_op) +DEF_EMIT_REG3_FORMAT(amandw, amandw_op) +DEF_EMIT_REG3_FORMAT(amandd, amandd_op) +DEF_EMIT_REG3_FORMAT(amorw, amorw_op) +DEF_EMIT_REG3_FORMAT(amord, amord_op) +DEF_EMIT_REG3_FORMAT(amxorw, amxorw_op) +DEF_EMIT_REG3_FORMAT(amxord, amxord_op) +DEF_EMIT_REG3_FORMAT(amswapw, amswapw_op) +DEF_EMIT_REG3_FORMAT(amswapd, amswapd_op) + +#define DEF_EMIT_REG3SA2_FORMAT(NAME, OP) \ +static inline void emit_##NAME(union loongarch_instruction *insn, \ + enum loongarch_gpr rd, \ + enum loongarch_gpr rj, \ + enum loongarch_gpr rk, \ + int imm) \ +{ \ + insn->reg3sa2_format.opcode = OP; \ + insn->reg3sa2_format.immediate = imm; \ + insn->reg3sa2_format.rd = rd; \ + insn->reg3sa2_format.rj = rj; \ + insn->reg3sa2_format.rk = rk; \ +} + +DEF_EMIT_REG3SA2_FORMAT(alsld, alsld_op) + #endif /* _ASM_INST_H */ diff --git a/arch/loongarch/include/uapi/asm/bpf_perf_event.h b/arch/loongarch/include/uapi/asm/bpf_perf_event.h new file mode 100644 index 000000000000..eb6e2fd2a1f0 --- /dev/null +++ b/arch/loongarch/include/uapi/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_BPF_PERF_EVENT_H__ + +#include <linux/ptrace.h> + +typedef struct user_pt_regs bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/arch/loongarch/net/Makefile b/arch/loongarch/net/Makefile new file mode 100644 index 000000000000..1ec12a0c324a --- /dev/null +++ b/arch/loongarch/net/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for arch/loongarch/net +# +# Copyright (C) 2022 Loongson Technology Corporation Limited +# +obj-$(CONFIG_BPF_JIT) += bpf_jit.o diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c new file mode 100644 index 000000000000..43f0a98efe38 --- /dev/null +++ b/arch/loongarch/net/bpf_jit.c @@ -0,0 +1,1179 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * BPF JIT compiler for LoongArch + * + * Copyright (C) 2022 Loongson Technology Corporation Limited + */ +#include "bpf_jit.h" + +#define REG_TCC LOONGARCH_GPR_A6 +#define TCC_SAVED LOONGARCH_GPR_S5 + +#define SAVE_RA BIT(0) +#define SAVE_TCC BIT(1) + +static const int regmap[] = { + /* return value from in-kernel function, and exit value for eBPF program */ + [BPF_REG_0] = LOONGARCH_GPR_A5, + /* arguments from eBPF program to in-kernel function */ + [BPF_REG_1] = LOONGARCH_GPR_A0, + [BPF_REG_2] = LOONGARCH_GPR_A1, + [BPF_REG_3] = LOONGARCH_GPR_A2, + [BPF_REG_4] = LOONGARCH_GPR_A3, + [BPF_REG_5] = LOONGARCH_GPR_A4, + /* callee saved registers that in-kernel function will preserve */ + [BPF_REG_6] = LOONGARCH_GPR_S0, + [BPF_REG_7] = LOONGARCH_GPR_S1, + [BPF_REG_8] = LOONGARCH_GPR_S2, + [BPF_REG_9] = LOONGARCH_GPR_S3, + /* read-only frame pointer to access stack */ + [BPF_REG_FP] = LOONGARCH_GPR_S4, + /* temporary register for blinding constants */ + [BPF_REG_AX] = LOONGARCH_GPR_T0, +}; + +static void mark_call(struct jit_ctx *ctx) +{ + ctx->flags |= SAVE_RA; +} + +static void mark_tail_call(struct jit_ctx *ctx) +{ + ctx->flags |= SAVE_TCC; +} + +static bool seen_call(struct jit_ctx *ctx) +{ + return (ctx->flags & SAVE_RA); +} + +static bool seen_tail_call(struct jit_ctx *ctx) +{ + return (ctx->flags & SAVE_TCC); +} + +static u8 tail_call_reg(struct jit_ctx *ctx) +{ + if (seen_call(ctx)) + return TCC_SAVED; + + return REG_TCC; +} + +/* + * eBPF prog stack layout: + * + * high + * original $sp ------------> +-------------------------+ <--LOONGARCH_GPR_FP + * | $ra | + * +-------------------------+ + * | $fp | + * +-------------------------+ + * | $s0 | + * +-------------------------+ + * | $s1 | + * +-------------------------+ + * | $s2 | + * +-------------------------+ + * | $s3 | + * +-------------------------+ + * | $s4 | + * +-------------------------+ + * | $s5 | + * +-------------------------+ <--BPF_REG_FP + * | prog->aux->stack_depth | + * | (optional) | + * current $sp -------------> +-------------------------+ + * low + */ +static void build_prologue(struct jit_ctx *ctx) +{ + int stack_adjust = 0, store_offset, bpf_stack_adjust; + + bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); + + /* To store ra, fp, s0, s1, s2, s3, s4 and s5. */ + stack_adjust += sizeof(long) * 8; + + stack_adjust = round_up(stack_adjust, 16); + stack_adjust += bpf_stack_adjust; + + /* + * First instruction initializes the tail call count (TCC). + * On tail call we skip this instruction, and the TCC is + * passed in REG_TCC from the caller. + */ + emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT); + + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust); + + store_offset = stack_adjust - sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, store_offset); + + store_offset -= sizeof(long); + emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset); + + emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust); + + if (bpf_stack_adjust) + emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust); + + /* + * Program contains calls and tail calls, so REG_TCC need + * to be saved across calls. + */ + if (seen_tail_call(ctx) && seen_call(ctx)) + move_reg(ctx, TCC_SAVED, REG_TCC); + + ctx->stack_size = stack_adjust; +} + +static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call) +{ + int stack_adjust = ctx->stack_size; + int load_offset; + + load_offset = stack_adjust - sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, load_offset); + + load_offset -= sizeof(long); + emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset); + + emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust); + + if (!is_tail_call) { + /* Set return value */ + move_reg(ctx, LOONGARCH_GPR_A0, regmap[BPF_REG_0]); + /* Return to the caller */ + emit_insn(ctx, jirl, LOONGARCH_GPR_RA, LOONGARCH_GPR_ZERO, 0); + } else { + /* + * Call the next bpf prog and skip the first instruction + * of TCC initialization. + */ + emit_insn(ctx, jirl, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, 1); + } +} + +static void build_epilogue(struct jit_ctx *ctx) +{ + __build_epilogue(ctx, false); +} + +bool bpf_jit_supports_kfunc_call(void) +{ + return true; +} + +/* initialized on the first pass of build_body() */ +static int out_offset = -1; +static int emit_bpf_tail_call(struct jit_ctx *ctx) +{ + int off; + u8 tcc = tail_call_reg(ctx); + u8 a1 = LOONGARCH_GPR_A1; + u8 a2 = LOONGARCH_GPR_A2; + u8 t1 = LOONGARCH_GPR_T1; + u8 t2 = LOONGARCH_GPR_T2; + u8 t3 = LOONGARCH_GPR_T3; + const int idx0 = ctx->idx; + +#define cur_offset (ctx->idx - idx0) +#define jmp_offset (out_offset - (cur_offset)) + + /* + * a0: &ctx + * a1: &array + * a2: index + * + * if (index >= array->map.max_entries) + * goto out; + */ + off = offsetof(struct bpf_array, map.max_entries); + emit_insn(ctx, ldwu, t1, a1, off); + /* bgeu $a2, $t1, jmp_offset */ + if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset) < 0) + goto toofar; + + /* + * if (--TCC < 0) + * goto out; + */ + emit_insn(ctx, addid, REG_TCC, tcc, -1); + if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0) + goto toofar; + + /* + * prog = array->ptrs[index]; + * if (!prog) + * goto out; + */ + emit_insn(ctx, alsld, t2, a2, a1, 2); + off = offsetof(struct bpf_array, ptrs); + emit_insn(ctx, ldd, t2, t2, off); + /* beq $t2, $zero, jmp_offset */ + if (emit_tailcall_jmp(ctx, BPF_JEQ, t2, LOONGARCH_GPR_ZERO, jmp_offset) < 0) + goto toofar; + + /* goto *(prog->bpf_func + 4); */ + off = offsetof(struct bpf_prog, bpf_func); + emit_insn(ctx, ldd, t3, t2, off); + __build_epilogue(ctx, true); + + /* out: */ + if (out_offset == -1) + out_offset = cur_offset; + if (cur_offset != out_offset) { + pr_err_once("tail_call out_offset = %d, expected %d!\n", + cur_offset, out_offset); + return -1; + } + + return 0; + +toofar: + pr_info_once("tail_call: jump too far\n"); + return -1; +#undef cur_offset +#undef jmp_offset +} + +static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) +{ + const u8 t1 = LOONGARCH_GPR_T1; + const u8 t2 = LOONGARCH_GPR_T2; + const u8 t3 = LOONGARCH_GPR_T3; + const u8 src = regmap[insn->src_reg]; + const u8 dst = regmap[insn->dst_reg]; + const s16 off = insn->off; + const s32 imm = insn->imm; + const bool isdw = BPF_SIZE(insn->code) == BPF_DW; + + move_imm(ctx, t1, off, false); + emit_insn(ctx, addd, t1, dst, t1); + move_reg(ctx, t3, src); + + switch (imm) { + /* lock *(size *)(dst + off) <op>= src */ + case BPF_ADD: + if (isdw) + emit_insn(ctx, amaddd, t2, t1, src); + else + emit_insn(ctx, amaddw, t2, t1, src); + break; + case BPF_AND: + if (isdw) + emit_insn(ctx, amandd, t2, t1, src); + else + emit_insn(ctx, amandw, t2, t1, src); + break; + case BPF_OR: + if (isdw) + emit_insn(ctx, amord, t2, t1, src); + else + emit_insn(ctx, amorw, t2, t1, src); + break; + case BPF_XOR: + if (isdw) + emit_insn(ctx, amxord, t2, t1, src); + else + emit_insn(ctx, amxorw, t2, t1, src); + break; + /* src = atomic_fetch_<op>(dst + off, src) */ + case BPF_ADD | BPF_FETCH: + if (isdw) { + emit_insn(ctx, amaddd, src, t1, t3); + } else { + emit_insn(ctx, amaddw, src, t1, t3); + emit_zext_32(ctx, src, true); + } + break; + case BPF_AND | BPF_FETCH: + if (isdw) { + emit_insn(ctx, amandd, src, t1, t3); + } else { + emit_insn(ctx, amandw, src, t1, t3); + emit_zext_32(ctx, src, true); + } + break; + case BPF_OR | BPF_FETCH: + if (isdw) { + emit_insn(ctx, amord, src, t1, t3); + } else { + emit_insn(ctx, amorw, src, t1, t3); + emit_zext_32(ctx, src, true); + } + break; + case BPF_XOR | BPF_FETCH: + if (isdw) { + emit_insn(ctx, amxord, src, t1, t3); + } else { + emit_insn(ctx, amxorw, src, t1, t3); + emit_zext_32(ctx, src, true); + } + break; + /* src = atomic_xchg(dst + off, src); */ + case BPF_XCHG: + if (isdw) { + emit_insn(ctx, amswapd, src, t1, t3); + } else { + emit_insn(ctx, amswapw, src, t1, t3); + emit_zext_32(ctx, src, true); + } + break; + /* r0 = atomic_cmpxchg(dst + off, r0, src); */ + case BPF_CMPXCHG: + u8 r0 = regmap[BPF_REG_0]; + + move_reg(ctx, t2, r0); + if (isdw) { + emit_insn(ctx, lld, r0, t1, 0); + emit_insn(ctx, bne, t2, r0, 4); + move_reg(ctx, t3, src); + emit_insn(ctx, scd, t3, t1, 0); + emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -4); + } else { + emit_insn(ctx, llw, r0, t1, 0); + emit_zext_32(ctx, t2, true); + emit_zext_32(ctx, r0, true); + emit_insn(ctx, bne, t2, r0, 4); + move_reg(ctx, t3, src); + emit_insn(ctx, scw, t3, t1, 0); + emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -6); + emit_zext_32(ctx, r0, true); + } + break; + } +} + +static bool is_signed_bpf_cond(u8 cond) +{ + return cond == BPF_JSGT || cond == BPF_JSLT || + cond == BPF_JSGE || cond == BPF_JSLE; +} + +static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass) +{ + const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || + BPF_CLASS(insn->code) == BPF_JMP32; + const u8 code = insn->code; + const u8 cond = BPF_OP(code); + const u8 t1 = LOONGARCH_GPR_T1; + const u8 t2 = LOONGARCH_GPR_T2; + const u8 src = regmap[insn->src_reg]; + const u8 dst = regmap[insn->dst_reg]; + const s16 off = insn->off; + const s32 imm = insn->imm; + int jmp_offset; + int i = insn - ctx->prog->insnsi; + + switch (code) { + /* dst = src */ + case BPF_ALU | BPF_MOV | BPF_X: + case BPF_ALU64 | BPF_MOV | BPF_X: + move_reg(ctx, dst, src); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = imm */ + case BPF_ALU | BPF_MOV | BPF_K: + case BPF_ALU64 | BPF_MOV | BPF_K: + move_imm(ctx, dst, imm, is32); + break; + + /* dst = dst + src */ + case BPF_ALU | BPF_ADD | BPF_X: + case BPF_ALU64 | BPF_ADD | BPF_X: + emit_insn(ctx, addd, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst + imm */ + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU64 | BPF_ADD | BPF_K: + if (is_signed_imm12(imm)) { + emit_insn(ctx, addid, dst, dst, imm); + } else { + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, addd, dst, dst, t1); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst - src */ + case BPF_ALU | BPF_SUB | BPF_X: + case BPF_ALU64 | BPF_SUB | BPF_X: + emit_insn(ctx, subd, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst - imm */ + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU64 | BPF_SUB | BPF_K: + if (is_signed_imm12(-imm)) { + emit_insn(ctx, addid, dst, dst, -imm); + } else { + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, subd, dst, dst, t1); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst * src */ + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU64 | BPF_MUL | BPF_X: + emit_insn(ctx, muld, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst * imm */ + case BPF_ALU | BPF_MUL | BPF_K: + case BPF_ALU64 | BPF_MUL | BPF_K: + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, muld, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst / src */ + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU64 | BPF_DIV | BPF_X: + emit_zext_32(ctx, dst, is32); + move_reg(ctx, t1, src); + emit_zext_32(ctx, t1, is32); + emit_insn(ctx, divdu, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst / imm */ + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU64 | BPF_DIV | BPF_K: + move_imm(ctx, t1, imm, is32); + emit_zext_32(ctx, dst, is32); + emit_insn(ctx, divdu, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst % src */ + case BPF_ALU | BPF_MOD | BPF_X: + case BPF_ALU64 | BPF_MOD | BPF_X: + emit_zext_32(ctx, dst, is32); + move_reg(ctx, t1, src); + emit_zext_32(ctx, t1, is32); + emit_insn(ctx, moddu, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst % imm */ + case BPF_ALU | BPF_MOD | BPF_K: + case BPF_ALU64 | BPF_MOD | BPF_K: + move_imm(ctx, t1, imm, is32); + emit_zext_32(ctx, dst, is32); + emit_insn(ctx, moddu, dst, dst, t1); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = -dst */ + case BPF_ALU | BPF_NEG: + case BPF_ALU64 | BPF_NEG: + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, subd, dst, LOONGARCH_GPR_ZERO, dst); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst & src */ + case BPF_ALU | BPF_AND | BPF_X: + case BPF_ALU64 | BPF_AND | BPF_X: + emit_insn(ctx, and, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst & imm */ + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU64 | BPF_AND | BPF_K: + if (is_unsigned_imm12(imm)) { + emit_insn(ctx, andi, dst, dst, imm); + } else { + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, and, dst, dst, t1); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst | src */ + case BPF_ALU | BPF_OR | BPF_X: + case BPF_ALU64 | BPF_OR | BPF_X: + emit_insn(ctx, or, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst | imm */ + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU64 | BPF_OR | BPF_K: + if (is_unsigned_imm12(imm)) { + emit_insn(ctx, ori, dst, dst, imm); + } else { + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, or, dst, dst, t1); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst ^ src */ + case BPF_ALU | BPF_XOR | BPF_X: + case BPF_ALU64 | BPF_XOR | BPF_X: + emit_insn(ctx, xor, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst ^ imm */ + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU64 | BPF_XOR | BPF_K: + if (is_unsigned_imm12(imm)) { + emit_insn(ctx, xori, dst, dst, imm); + } else { + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, xor, dst, dst, t1); + } + emit_zext_32(ctx, dst, is32); + break; + + /* dst = dst << src (logical) */ + case BPF_ALU | BPF_LSH | BPF_X: + emit_insn(ctx, sllw, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + + case BPF_ALU64 | BPF_LSH | BPF_X: + emit_insn(ctx, slld, dst, dst, src); + break; + + /* dst = dst << imm (logical) */ + case BPF_ALU | BPF_LSH | BPF_K: + emit_insn(ctx, slliw, dst, dst, imm); + emit_zext_32(ctx, dst, is32); + break; + + case BPF_ALU64 | BPF_LSH | BPF_K: + emit_insn(ctx, sllid, dst, dst, imm); + break; + + /* dst = dst >> src (logical) */ + case BPF_ALU | BPF_RSH | BPF_X: + emit_insn(ctx, srlw, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + + case BPF_ALU64 | BPF_RSH | BPF_X: + emit_insn(ctx, srld, dst, dst, src); + break; + + /* dst = dst >> imm (logical) */ + case BPF_ALU | BPF_RSH | BPF_K: + emit_insn(ctx, srliw, dst, dst, imm); + emit_zext_32(ctx, dst, is32); + break; + + case BPF_ALU64 | BPF_RSH | BPF_K: + emit_insn(ctx, srlid, dst, dst, imm); + break; + + /* dst = dst >> src (arithmetic) */ + case BPF_ALU | BPF_ARSH | BPF_X: + emit_insn(ctx, sraw, dst, dst, src); + emit_zext_32(ctx, dst, is32); + break; + + case BPF_ALU64 | BPF_ARSH | BPF_X: + emit_insn(ctx, srad, dst, dst, src); + break; + + /* dst = dst >> imm (arithmetic) */ + case BPF_ALU | BPF_ARSH | BPF_K: + emit_insn(ctx, sraiw, dst, dst, imm); + emit_zext_32(ctx, dst, is32); + break; + + case BPF_ALU64 | BPF_ARSH | BPF_K: + emit_insn(ctx, sraid, dst, dst, imm); + break; + + /* dst = BSWAP##imm(dst) */ + case BPF_ALU | BPF_END | BPF_FROM_LE: + switch (imm) { + case 16: + /* zero-extend 16 bits into 64 bits */ + emit_insn(ctx, bstrpickd, dst, dst, 15, 0); + break; + case 32: + /* zero-extend 32 bits into 64 bits */ + emit_zext_32(ctx, dst, is32); + break; + case 64: + /* do nothing */ + break; + } + break; + + case BPF_ALU | BPF_END | BPF_FROM_BE: + switch (imm) { + case 16: + emit_insn(ctx, revb2h, dst, dst); + /* zero-extend 16 bits into 64 bits */ + emit_insn(ctx, bstrpickd, dst, dst, 15, 0); + break; + case 32: + emit_insn(ctx, revb2w, dst, dst); + /* zero-extend 32 bits into 64 bits */ + emit_zext_32(ctx, dst, is32); + break; + case 64: + emit_insn(ctx, revbd, dst, dst); + break; + } + break; + + /* PC += off if dst cond src */ + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_X: + jmp_offset = bpf2la_offset(i, off, ctx); + move_reg(ctx, t1, dst); + move_reg(ctx, t2, src); + if (is_signed_bpf_cond(BPF_OP(code))) { + emit_sext_32(ctx, t1, is32); + emit_sext_32(ctx, t2, is32); + } else { + emit_zext_32(ctx, t1, is32); + emit_zext_32(ctx, t2, is32); + } + if (emit_cond_jmp(ctx, cond, t1, t2, jmp_offset) < 0) + goto toofar; + break; + + /* PC += off if dst cond imm */ + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP | BPF_JLE | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + u8 t7 = -1; + jmp_offset = bpf2la_offset(i, off, ctx); + if (imm) { + move_imm(ctx, t1, imm, false); + t7 = t1; + } else { + /* If imm is 0, simply use zero register. */ + t7 = LOONGARCH_GPR_ZERO; + } + move_reg(ctx, t2, dst); + if (is_signed_bpf_cond(BPF_OP(code))) { + emit_sext_32(ctx, t7, is32); + emit_sext_32(ctx, t2, is32); + } else { + emit_zext_32(ctx, t7, is32); + emit_zext_32(ctx, t2, is32); + } + if (emit_cond_jmp(ctx, cond, t2, t7, jmp_offset) < 0) + goto toofar; + break; + + /* PC += off if dst & src */ + case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_X: + jmp_offset = bpf2la_offset(i, off, ctx); + emit_insn(ctx, and, t1, dst, src); + emit_zext_32(ctx, t1, is32); + if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0) + goto toofar; + break; + + /* PC += off if dst & imm */ + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: + jmp_offset = bpf2la_offset(i, off, ctx); + move_imm(ctx, t1, imm, is32); + emit_insn(ctx, and, t1, dst, t1); + emit_zext_32(ctx, t1, is32); + if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0) + goto toofar; + break; + + /* PC += off */ + case BPF_JMP | BPF_JA: + jmp_offset = bpf2la_offset(i, off, ctx); + if (emit_uncond_jmp(ctx, jmp_offset) < 0) + goto toofar; + break; + + /* function c |