Commit 7db6c0f1 authored by Xu Kuohai's avatar Xu Kuohai Committed by Daniel Borkmann
Browse files

bpf, arm64: Optimize BPF store/load using arm64 str/ldr(immediate offset)



The current BPF store/load instruction is translated by the JIT into two
instructions. The first instruction moves the immediate offset into a
temporary register. The second instruction uses this temporary register
to do the real store/load.

In fact, arm64 supports addressing with immediate offsets. So This patch
introduces optimization that uses arm64 str/ldr instruction with immediate
offset when the offset fits.

Example of generated instuction for r2 = *(u64 *)(r1 + 0):

without optimization:
mov x10, 0
ldr x1, [x0, x10]

with optimization:
ldr x1, [x0, 0]

If the offset is negative, or is not aligned correctly, or exceeds max
value, rollback to the use of temporary register.

Signed-off-by: default avatarXu Kuohai <xukuohai@huawei.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220321152852.2334294-3-xukuohai@huawei.com
parent 30c90f67
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -66,6 +66,20 @@
#define A64_STR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, STORE)
#define A64_LDR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, LOAD)

/* Load/store register (immediate offset) */
#define A64_LS_IMM(Rt, Rn, imm, size, type) \
	aarch64_insn_gen_load_store_imm(Rt, Rn, imm, \
		AARCH64_INSN_SIZE_##size, \
		AARCH64_INSN_LDST_##type##_IMM_OFFSET)
#define A64_STRBI(Wt, Xn, imm)  A64_LS_IMM(Wt, Xn, imm, 8, STORE)
#define A64_LDRBI(Wt, Xn, imm)  A64_LS_IMM(Wt, Xn, imm, 8, LOAD)
#define A64_STRHI(Wt, Xn, imm)  A64_LS_IMM(Wt, Xn, imm, 16, STORE)
#define A64_LDRHI(Wt, Xn, imm)  A64_LS_IMM(Wt, Xn, imm, 16, LOAD)
#define A64_STR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, STORE)
#define A64_LDR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, LOAD)
#define A64_STR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, STORE)
#define A64_LDR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, LOAD)

/* Load/store register pair */
#define A64_LS_PAIR(Rt, Rt2, Rn, offset, ls, type) \
	aarch64_insn_gen_load_store_pair(Rt, Rt2, Rn, offset, \
+113 −15
Original line number Diff line number Diff line
@@ -191,6 +191,47 @@ static bool is_addsub_imm(u32 imm)
	return !(imm & ~0xfff) || !(imm & ~0xfff000);
}

/*
 * There are 3 types of AArch64 LDR/STR (immediate) instruction:
 * Post-index, Pre-index, Unsigned offset.
 *
 * For BPF ldr/str, the "unsigned offset" type is sufficient.
 *
 * "Unsigned offset" type LDR(immediate) format:
 *
 *    3                   2                   1                   0
 *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 * |x x|1 1 1 0 0 1 0 1|         imm12         |    Rn   |    Rt   |
 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 * scale
 *
 * "Unsigned offset" type STR(immediate) format:
 *    3                   2                   1                   0
 *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 * |x x|1 1 1 0 0 1 0 0|         imm12         |    Rn   |    Rt   |
 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 * scale
 *
 * The offset is calculated from imm12 and scale in the following way:
 *
 * offset = (u64)imm12 << scale
 */
static bool is_lsi_offset(s16 offset, int scale)
{
	if (offset < 0)
		return false;

	if (offset > (0xFFF << scale))
		return false;

	if (offset & ((1 << scale) - 1))
		return false;

	return true;
}

/* Tail call offset to jump into */
#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)
#define PROLOGUE_OFFSET 8
@@ -971,19 +1012,38 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
		emit_a64_mov_i(1, tmp, off, ctx);
		switch (BPF_SIZE(code)) {
		case BPF_W:
			if (is_lsi_offset(off, 2)) {
				emit(A64_LDR32I(dst, src, off), ctx);
			} else {
				emit_a64_mov_i(1, tmp, off, ctx);
				emit(A64_LDR32(dst, src, tmp), ctx);
			}
			break;
		case BPF_H:
			if (is_lsi_offset(off, 1)) {
				emit(A64_LDRHI(dst, src, off), ctx);
			} else {
				emit_a64_mov_i(1, tmp, off, ctx);
				emit(A64_LDRH(dst, src, tmp), ctx);
			}
			break;
		case BPF_B:
			if (is_lsi_offset(off, 0)) {
				emit(A64_LDRBI(dst, src, off), ctx);
			} else {
				emit_a64_mov_i(1, tmp, off, ctx);
				emit(A64_LDRB(dst, src, tmp), ctx);
			}
			break;
		case BPF_DW:
			if (is_lsi_offset(off, 3)) {
				emit(A64_LDR64I(dst, src, off), ctx);
			} else {
				emit_a64_mov_i(1, tmp, off, ctx);
				emit(A64_LDR64(dst, src, tmp), ctx);
			}
			break;
		}

@@ -1011,20 +1071,39 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
	case BPF_ST | BPF_MEM | BPF_B:
	case BPF_ST | BPF_MEM | BPF_DW:
		/* Load imm to a register then store it */
		emit_a64_mov_i(1, tmp2, off, ctx);
		emit_a64_mov_i(1, tmp, imm, ctx);
		switch (BPF_SIZE(code)) {
		case BPF_W:
			if (is_lsi_offset(off, 2)) {
				emit(A64_STR32I(tmp, dst, off), ctx);
			} else {
				emit_a64_mov_i(1, tmp2, off, ctx);
				emit(A64_STR32(tmp, dst, tmp2), ctx);
			}
			break;
		case BPF_H:
			if (is_lsi_offset(off, 1)) {
				emit(A64_STRHI(tmp, dst, off), ctx);
			} else {
				emit_a64_mov_i(1, tmp2, off, ctx);
				emit(A64_STRH(tmp, dst, tmp2), ctx);
			}
			break;
		case BPF_B:
			if (is_lsi_offset(off, 0)) {
				emit(A64_STRBI(tmp, dst, off), ctx);
			} else {
				emit_a64_mov_i(1, tmp2, off, ctx);
				emit(A64_STRB(tmp, dst, tmp2), ctx);
			}
			break;
		case BPF_DW:
			if (is_lsi_offset(off, 3)) {
				emit(A64_STR64I(tmp, dst, off), ctx);
			} else {
				emit_a64_mov_i(1, tmp2, off, ctx);
				emit(A64_STR64(tmp, dst, tmp2), ctx);
			}
			break;
		}
		break;
@@ -1034,19 +1113,38 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
	case BPF_STX | BPF_MEM | BPF_H:
	case BPF_STX | BPF_MEM | BPF_B:
	case BPF_STX | BPF_MEM | BPF_DW:
		emit_a64_mov_i(1, tmp, off, ctx);
		switch (BPF_SIZE(code)) {
		case BPF_W:
			if (is_lsi_offset(off, 2)) {
				emit(A64_STR32I(src, dst, off), ctx);
			} else {
				emit_a64_mov_i(1, tmp, off, ctx);
				emit(A64_STR32(src, dst, tmp), ctx);
			}
			break;
		case BPF_H:
			if (is_lsi_offset(off, 1)) {
				emit(A64_STRHI(src, dst, off), ctx);
			} else {
				emit_a64_mov_i(1, tmp, off, ctx);
				emit(A64_STRH(src, dst, tmp), ctx);
			}
			break;
		case BPF_B:
			if (is_lsi_offset(off, 0)) {
				emit(A64_STRBI(src, dst, off), ctx);
			} else {
				emit_a64_mov_i(1, tmp, off, ctx);
				emit(A64_STRB(src, dst, tmp), ctx);
			}
			break;
		case BPF_DW:
			if (is_lsi_offset(off, 3)) {
				emit(A64_STR64I(src, dst, off), ctx);
			} else {
				emit_a64_mov_i(1, tmp, off, ctx);
				emit(A64_STR64(src, dst, tmp), ctx);
			}
			break;
		}
		break;