Loading arch/arm/net/bpf_jit_32.c +43 −12 Original line number Diff line number Diff line Loading @@ -22,6 +22,7 @@ #include <asm/cacheflush.h> #include <asm/hwcap.h> #include <asm/opcodes.h> #include <asm/system_info.h> #include "bpf_jit_32.h" Loading Loading @@ -192,6 +193,7 @@ struct jit_ctx { unsigned int idx; unsigned int prologue_bytes; unsigned int epilogue_offset; unsigned int cpu_architecture; u32 flags; u32 *offsets; u32 *target; Loading Loading @@ -319,10 +321,12 @@ static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8) #define ARM_LDR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDR_I, rt, rn, off) #define ARM_LDRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDRB_I, rt, rn, off) #define ARM_LDRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off) #define ARM_LDRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off) #define ARM_STR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off) #define ARM_STRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off) #define ARM_STRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off) #define ARM_STRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRH_I, rt, rn, off) /* Loading Loading @@ -533,10 +537,16 @@ static const s8 *arm_bpf_get_reg64(const s8 *reg, const s8 *tmp, struct jit_ctx *ctx) { if (is_stacked(reg[1])) { emit(ARM_LDR_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); emit(ARM_LDR_I(tmp[0], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx); if (__LINUX_ARM_ARCH__ >= 6 || ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) { emit(ARM_LDRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); } else { emit(ARM_LDR_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); emit(ARM_LDR_I(tmp[0], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx); } reg = tmp; } return reg; Loading @@ -558,10 +568,16 @@ static void arm_bpf_put_reg64(const s8 *reg, const s8 *src, struct jit_ctx *ctx) { if (is_stacked(reg[1])) { emit(ARM_STR_I(src[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); emit(ARM_STR_I(src[0], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx); if (__LINUX_ARM_ARCH__ >= 6 || ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) { emit(ARM_STRD_I(src[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); } else { emit(ARM_STR_I(src[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); emit(ARM_STR_I(src[0], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx); } } else { if (reg[1] != src[1]) emit(ARM_MOV_R(reg[1], src[1]), ctx); Loading Loading @@ -711,13 +727,27 @@ static inline void emit_a32_mov_r(const s8 dst, const s8 src, static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], const s8 src[], struct jit_ctx *ctx) { if (!is64) { emit_a32_mov_r(dst_lo, src_lo, ctx); if (is64) { /* Zero out high 4 bytes */ emit_a32_mov_i(dst_hi, 0, ctx); } else if (__LINUX_ARM_ARCH__ < 6 && ctx->cpu_architecture < CPU_ARCH_ARMv5TE) { /* complete 8 byte move */ emit_a32_mov_r(dst_lo, src_lo, ctx); emit_a32_mov_r(dst_hi, src_hi, ctx); } else if (is_stacked(src_lo) && is_stacked(dst_lo)) { const u8 *tmp = bpf2a32[TMP_REG_1]; emit(ARM_LDRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx); emit(ARM_STRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx); } else if (is_stacked(src_lo)) { emit(ARM_LDRD_I(dst[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx); } else if (is_stacked(dst_lo)) { emit(ARM_STRD_I(src[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx); } else { /* Zero out high 4 bytes */ emit_a32_mov_i(dst_hi, 0, ctx); emit(ARM_MOV_R(dst[0], src[0]), ctx); emit(ARM_MOV_R(dst[1], src[1]), ctx); } } Loading Loading @@ -1778,6 +1808,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) memset(&ctx, 0, sizeof(ctx)); ctx.prog = prog; ctx.cpu_architecture = cpu_architecture(); /* Not able to allocate memory for offsets[] , then * we must fall back to the interpreter Loading arch/arm/net/bpf_jit_32.h +2 −0 Original line number Diff line number Diff line Loading @@ -81,6 +81,7 @@ #define ARM_INST_LDST__IMM12 0x00000fff #define ARM_INST_LDRB_I 0x05500000 #define ARM_INST_LDRB_R 0x07d00000 #define ARM_INST_LDRD_I 0x014000d0 #define ARM_INST_LDRH_I 0x015000b0 #define ARM_INST_LDRH_R 0x019000b0 #define ARM_INST_LDR_I 0x05100000 Loading Loading @@ -128,6 +129,7 @@ #define ARM_INST_STR_I 0x05000000 #define ARM_INST_STRB_I 0x05400000 #define ARM_INST_STRD_I 0x014000f0 #define ARM_INST_STRH_I 0x014000b0 #define ARM_INST_TST_R 0x01100000 Loading Loading
arch/arm/net/bpf_jit_32.c +43 −12 Original line number Diff line number Diff line Loading @@ -22,6 +22,7 @@ #include <asm/cacheflush.h> #include <asm/hwcap.h> #include <asm/opcodes.h> #include <asm/system_info.h> #include "bpf_jit_32.h" Loading Loading @@ -192,6 +193,7 @@ struct jit_ctx { unsigned int idx; unsigned int prologue_bytes; unsigned int epilogue_offset; unsigned int cpu_architecture; u32 flags; u32 *offsets; u32 *target; Loading Loading @@ -319,10 +321,12 @@ static u32 arm_bpf_ldst_imm8(u32 op, u8 rt, u8 rn, s16 imm8) #define ARM_LDR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDR_I, rt, rn, off) #define ARM_LDRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_LDRB_I, rt, rn, off) #define ARM_LDRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRD_I, rt, rn, off) #define ARM_LDRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_LDRH_I, rt, rn, off) #define ARM_STR_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STR_I, rt, rn, off) #define ARM_STRB_I(rt, rn, off) arm_bpf_ldst_imm12(ARM_INST_STRB_I, rt, rn, off) #define ARM_STRD_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRD_I, rt, rn, off) #define ARM_STRH_I(rt, rn, off) arm_bpf_ldst_imm8(ARM_INST_STRH_I, rt, rn, off) /* Loading Loading @@ -533,10 +537,16 @@ static const s8 *arm_bpf_get_reg64(const s8 *reg, const s8 *tmp, struct jit_ctx *ctx) { if (is_stacked(reg[1])) { emit(ARM_LDR_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); emit(ARM_LDR_I(tmp[0], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx); if (__LINUX_ARM_ARCH__ >= 6 || ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) { emit(ARM_LDRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); } else { emit(ARM_LDR_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); emit(ARM_LDR_I(tmp[0], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx); } reg = tmp; } return reg; Loading @@ -558,10 +568,16 @@ static void arm_bpf_put_reg64(const s8 *reg, const s8 *src, struct jit_ctx *ctx) { if (is_stacked(reg[1])) { emit(ARM_STR_I(src[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); emit(ARM_STR_I(src[0], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx); if (__LINUX_ARM_ARCH__ >= 6 || ctx->cpu_architecture >= CPU_ARCH_ARMv5TE) { emit(ARM_STRD_I(src[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); } else { emit(ARM_STR_I(src[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[1])), ctx); emit(ARM_STR_I(src[0], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg[0])), ctx); } } else { if (reg[1] != src[1]) emit(ARM_MOV_R(reg[1], src[1]), ctx); Loading Loading @@ -711,13 +727,27 @@ static inline void emit_a32_mov_r(const s8 dst, const s8 src, static inline void emit_a32_mov_r64(const bool is64, const s8 dst[], const s8 src[], struct jit_ctx *ctx) { if (!is64) { emit_a32_mov_r(dst_lo, src_lo, ctx); if (is64) { /* Zero out high 4 bytes */ emit_a32_mov_i(dst_hi, 0, ctx); } else if (__LINUX_ARM_ARCH__ < 6 && ctx->cpu_architecture < CPU_ARCH_ARMv5TE) { /* complete 8 byte move */ emit_a32_mov_r(dst_lo, src_lo, ctx); emit_a32_mov_r(dst_hi, src_hi, ctx); } else if (is_stacked(src_lo) && is_stacked(dst_lo)) { const u8 *tmp = bpf2a32[TMP_REG_1]; emit(ARM_LDRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx); emit(ARM_STRD_I(tmp[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx); } else if (is_stacked(src_lo)) { emit(ARM_LDRD_I(dst[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(src_lo)), ctx); } else if (is_stacked(dst_lo)) { emit(ARM_STRD_I(src[1], ARM_FP, EBPF_SCRATCH_TO_ARM_FP(dst_lo)), ctx); } else { /* Zero out high 4 bytes */ emit_a32_mov_i(dst_hi, 0, ctx); emit(ARM_MOV_R(dst[0], src[0]), ctx); emit(ARM_MOV_R(dst[1], src[1]), ctx); } } Loading Loading @@ -1778,6 +1808,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) memset(&ctx, 0, sizeof(ctx)); ctx.prog = prog; ctx.cpu_architecture = cpu_architecture(); /* Not able to allocate memory for offsets[] , then * we must fall back to the interpreter Loading
arch/arm/net/bpf_jit_32.h +2 −0 Original line number Diff line number Diff line Loading @@ -81,6 +81,7 @@ #define ARM_INST_LDST__IMM12 0x00000fff #define ARM_INST_LDRB_I 0x05500000 #define ARM_INST_LDRB_R 0x07d00000 #define ARM_INST_LDRD_I 0x014000d0 #define ARM_INST_LDRH_I 0x015000b0 #define ARM_INST_LDRH_R 0x019000b0 #define ARM_INST_LDR_I 0x05100000 Loading Loading @@ -128,6 +129,7 @@ #define ARM_INST_STR_I 0x05000000 #define ARM_INST_STRB_I 0x05400000 #define ARM_INST_STRD_I 0x014000f0 #define ARM_INST_STRH_I 0x014000b0 #define ARM_INST_TST_R 0x01100000 Loading