Commit c1a3daf7 authored by Alexei Starovoitov's avatar Alexei Starovoitov
Browse files

Merge branch 'Support bpf trampoline for s390x'

Ilya Leoshkevich says:

====================

v2: https://lore.kernel.org/bpf/20230128000650.1516334-1-iii@linux.ibm.com/#t


v2 -> v3:
- Make __arch_prepare_bpf_trampoline static.
  (Reported-by: default avatarkernel test robot <lkp@intel.com&gt;)>
- Support both old- and new- style map definitions in sk_assign. (Alexei)
- Trim DENYLIST.s390x. (Alexei)
- Adjust s390x vmlinux path in vmtest.sh.
- Drop merged fixes.

v1: https://lore.kernel.org/bpf/20230125213817.1424447-1-iii@linux.ibm.com/#t


v1 -> v2:
- Fix core_read_macros, sk_assign, test_profiler, test_bpffs (24/31;
  I'm not quite happy with the fix, but don't have better ideas),
  and xdp_synproxy. (Andrii)
- Prettify liburandom_read and verify_pkcs7_sig fixes. (Andrii)
- Fix bpf_usdt_arg using barrier_var(); prettify barrier_var(). (Andrii)
- Change BPF_MAX_TRAMP_LINKS to enum and query it using BTF. (Andrii)
- Improve bpf_jit_supports_kfunc_call() description. (Alexei)
- Always check sign_extend() return value.
- Cc: Alexander Gordeev.

Hi,

This series implements poke, trampoline, kfunc, and mixing subprogs
and tailcalls on s390x.

The following failures still remain:

#82      get_stack_raw_tp:FAIL
get_stack_print_output:FAIL:user_stack corrupted user stack
Known issue:
We cannot reliably unwind userspace on s390x without DWARF.

#101     ksyms_module:FAIL
address of kernel function bpf_testmod_test_mod_kfunc is out of range
Known issue:
Kernel and modules are too far away from each other on s390x.

#190     stacktrace_build_id:FAIL
Known issue:
We cannot reliably unwind userspace on s390x without DWARF.

#281     xdp_metadata:FAIL
See patch 6.

None of these seem to be due to the new changes.

Best regards,
Ilya
====================

Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 07dcbd73 ee105d5a
Loading
Loading
Loading
Loading
+677 −36
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@
#include <asm/facility.h>
#include <asm/nospec-branch.h>
#include <asm/set_memory.h>
#include <asm/text-patching.h>
#include "bpf_jit.h"

struct bpf_jit {
@@ -50,12 +51,13 @@ struct bpf_jit {
	int r14_thunk_ip;	/* Address of expoline thunk for 'br %r14' */
	int tail_call_start;	/* Tail call start offset */
	int excnt;		/* Number of exception table entries */
	int prologue_plt_ret;	/* Return address for prologue hotpatch PLT */
	int prologue_plt;	/* Start of prologue hotpatch PLT */
};

#define SEEN_MEM	BIT(0)		/* use mem[] for temporary storage */
#define SEEN_LITERAL	BIT(1)		/* code uses literals */
#define SEEN_FUNC	BIT(2)		/* calls C functions */
#define SEEN_TAIL_CALL	BIT(3)		/* code uses tail calls */
#define SEEN_STACK	(SEEN_FUNC | SEEN_MEM)

/*
@@ -68,6 +70,10 @@ struct bpf_jit {
#define REG_0		REG_W0			/* Register 0 */
#define REG_1		REG_W1			/* Register 1 */
#define REG_2		BPF_REG_1		/* Register 2 */
#define REG_3		BPF_REG_2		/* Register 3 */
#define REG_4		BPF_REG_3		/* Register 4 */
#define REG_7		BPF_REG_6		/* Register 7 */
#define REG_8		BPF_REG_7		/* Register 8 */
#define REG_14		BPF_REG_0		/* Register 14 */

/*
@@ -506,21 +512,59 @@ static void bpf_skip(struct bpf_jit *jit, int size)
	}
}

/*
 * PLT for hotpatchable calls. The calling convention is the same as for the
 * ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered.
 */
extern const char bpf_plt[];
extern const char bpf_plt_ret[];
extern const char bpf_plt_target[];
extern const char bpf_plt_end[];
#define BPF_PLT_SIZE 32
asm(
	".pushsection .rodata\n"
	"	.align 8\n"
	"bpf_plt:\n"
	"	lgrl %r0,bpf_plt_ret\n"
	"	lgrl %r1,bpf_plt_target\n"
	"	br %r1\n"
	"	.align 8\n"
	"bpf_plt_ret: .quad 0\n"
	"bpf_plt_target: .quad 0\n"
	"bpf_plt_end:\n"
	"	.popsection\n"
);

static void bpf_jit_plt(void *plt, void *ret, void *target)
{
	memcpy(plt, bpf_plt, BPF_PLT_SIZE);
	*(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret;
	*(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target;
}

/*
 * Emit function prologue
 *
 * Save registers and create stack frame if necessary.
 * See stack frame layout description in "bpf_jit.h"!
 */
static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
			     u32 stack_depth)
{
	if (jit->seen & SEEN_TAIL_CALL) {
	/* No-op for hotpatching */
	/* brcl 0,prologue_plt */
	EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt);
	jit->prologue_plt_ret = jit->prg;

	if (fp->aux->func_idx == 0) {
		/* Initialize the tail call counter in the main program. */
		/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
		_EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
	} else {
		/*
		 * There are no tail calls. Insert nops in order to have
		 * tail_call_start at a predictable offset.
		 * Skip the tail call counter initialization in subprograms.
		 * Insert nops in order to have tail_call_start at a
		 * predictable offset.
		 */
		bpf_skip(jit, 6);
	}
@@ -557,6 +601,43 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
	}
}

/*
 * Emit an expoline for a jump that follows
 */
static void emit_expoline(struct bpf_jit *jit)
{
	/* exrl %r0,.+10 */
	EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
	/* j . */
	EMIT4_PCREL(0xa7f40000, 0);
}

/*
 * Emit __s390_indirect_jump_r1 thunk if necessary
 */
static void emit_r1_thunk(struct bpf_jit *jit)
{
	if (nospec_uses_trampoline()) {
		jit->r1_thunk_ip = jit->prg;
		emit_expoline(jit);
		/* br %r1 */
		_EMIT2(0x07f1);
	}
}

/*
 * Call r1 either directly or via __s390_indirect_jump_r1 thunk
 */
static void call_r1(struct bpf_jit *jit)
{
	if (nospec_uses_trampoline())
		/* brasl %r14,__s390_indirect_jump_r1 */
		EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
	else
		/* basr %r14,%r1 */
		EMIT2(0x0d00, REG_14, REG_1);
}

/*
 * Function epilogue
 */
@@ -570,25 +651,20 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
	if (nospec_uses_trampoline()) {
		jit->r14_thunk_ip = jit->prg;
		/* Generate __s390_indirect_jump_r14 thunk */
		/* exrl %r0,.+10 */
		EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
		/* j . */
		EMIT4_PCREL(0xa7f40000, 0);
		emit_expoline(jit);
	}
	/* br %r14 */
	_EMIT2(0x07fe);

	if ((nospec_uses_trampoline()) &&
	    (is_first_pass(jit) || (jit->seen & SEEN_FUNC))) {
		jit->r1_thunk_ip = jit->prg;
		/* Generate __s390_indirect_jump_r1 thunk */
		/* exrl %r0,.+10 */
		EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
		/* j . */
		EMIT4_PCREL(0xa7f40000, 0);
		/* br %r1 */
		_EMIT2(0x07f1);
	}
	if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
		emit_r1_thunk(jit);

	jit->prg = ALIGN(jit->prg, 8);
	jit->prologue_plt = jit->prg;
	if (jit->prg_buf)
		bpf_jit_plt(jit->prg_buf + jit->prg,
			    jit->prg_buf + jit->prologue_plt_ret, NULL);
	jit->prg += BPF_PLT_SIZE;
}

static int get_probe_mem_regno(const u8 *insn)
@@ -662,6 +738,34 @@ static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp,
	return 0;
}

/*
 * Sign-extend the register if necessary
 */
static int sign_extend(struct bpf_jit *jit, int r, u8 size, u8 flags)
{
	if (!(flags & BTF_FMODEL_SIGNED_ARG))
		return 0;

	switch (size) {
	case 1:
		/* lgbr %r,%r */
		EMIT4(0xb9060000, r, r);
		return 0;
	case 2:
		/* lghr %r,%r */
		EMIT4(0xb9070000, r, r);
		return 0;
	case 4:
		/* lgfr %r,%r */
		EMIT4(0xb9140000, r, r);
		return 0;
	case 8:
		return 0;
	default:
		return -1;
	}
}

/*
 * Compile one eBPF instruction into s390x code
 *
@@ -1297,9 +1401,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
	 */
	case BPF_JMP | BPF_CALL:
	{
		u64 func;
		const struct btf_func_model *m;
		bool func_addr_fixed;
		int ret;
		int j, ret;
		u64 func;

		ret = bpf_jit_get_func_addr(fp, insn, extra_pass,
					    &func, &func_addr_fixed);
@@ -1308,15 +1413,38 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,

		REG_SET_SEEN(BPF_REG_5);
		jit->seen |= SEEN_FUNC;
		/*
		 * Copy the tail call counter to where the callee expects it.
		 *
		 * Note 1: The callee can increment the tail call counter, but
		 * we do not load it back, since the x86 JIT does not do this
		 * either.
		 *
		 * Note 2: We assume that the verifier does not let us call the
		 * main program, which clears the tail call counter on entry.
		 */
		/* mvc STK_OFF_TCCNT(4,%r15),N(%r15) */
		_EMIT6(0xd203f000 | STK_OFF_TCCNT,
		       0xf000 | (STK_OFF_TCCNT + STK_OFF + stack_depth));

		/* Sign-extend the kfunc arguments. */
		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
			m = bpf_jit_find_kfunc_model(fp, insn);
			if (!m)
				return -1;

			for (j = 0; j < m->nr_args; j++) {
				if (sign_extend(jit, BPF_REG_1 + j,
						m->arg_size[j],
						m->arg_flags[j]))
					return -1;
			}
		}

		/* lgrl %w1,func */
		EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func));
		if (nospec_uses_trampoline()) {
			/* brasl %r14,__s390_indirect_jump_r1 */
			EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
		} else {
			/* basr %r14,%w1 */
			EMIT2(0x0d00, REG_14, REG_W1);
		}
		/* %r1() */
		call_r1(jit);
		/* lgr %b0,%r2: load return value into %b0 */
		EMIT4(0xb9040000, BPF_REG_0, REG_2);
		break;
@@ -1329,10 +1457,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
		 *  B1: pointer to ctx
		 *  B2: pointer to bpf_array
		 *  B3: index in bpf_array
		 */
		jit->seen |= SEEN_TAIL_CALL;

		/*
		 *
		 * if (index >= array->map.max_entries)
		 *         goto out;
		 */
@@ -1393,8 +1518,16 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
		/* lg %r1,bpf_func(%r1) */
		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_1, REG_0,
			      offsetof(struct bpf_prog, bpf_func));
		if (nospec_uses_trampoline()) {
			jit->seen |= SEEN_FUNC;
			/* aghi %r1,tail_call_start */
			EMIT4_IMM(0xa70b0000, REG_1, jit->tail_call_start);
			/* brcl 0xf,__s390_indirect_jump_r1 */
			EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->r1_thunk_ip);
		} else {
			/* bc 0xf,tail_call_start(%r1) */
			_EMIT4(0x47f01000 + jit->tail_call_start);
		}
		/* out: */
		if (jit->prg_buf) {
			*(u16 *)(jit->prg_buf + patch_1_clrj + 2) =
@@ -1688,7 +1821,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
	jit->prg = 0;
	jit->excnt = 0;

	bpf_jit_prologue(jit, stack_depth);
	bpf_jit_prologue(jit, fp, stack_depth);
	if (bpf_set_addr(jit, 0) < 0)
		return -1;
	for (i = 0; i < fp->len; i += insn_count) {
@@ -1768,6 +1901,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
	struct bpf_jit jit;
	int pass;

	if (WARN_ON_ONCE(bpf_plt_end - bpf_plt != BPF_PLT_SIZE))
		return orig_fp;

	if (!fp->jit_requested)
		return orig_fp;

@@ -1859,3 +1995,508 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
					   tmp : orig_fp);
	return fp;
}

bool bpf_jit_supports_kfunc_call(void)
{
	return true;
}

int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
		       void *old_addr, void *new_addr)
{
	struct {
		u16 opc;
		s32 disp;
	} __packed insn;
	char expected_plt[BPF_PLT_SIZE];
	char current_plt[BPF_PLT_SIZE];
	char *plt;
	int err;

	/* Verify the branch to be patched. */
	err = copy_from_kernel_nofault(&insn, ip, sizeof(insn));
	if (err < 0)
		return err;
	if (insn.opc != (0xc004 | (old_addr ? 0xf0 : 0)))
		return -EINVAL;

	if (t == BPF_MOD_JUMP &&
	    insn.disp == ((char *)new_addr - (char *)ip) >> 1) {
		/*
		 * The branch already points to the destination,
		 * there is no PLT.
		 */
	} else {
		/* Verify the PLT. */
		plt = (char *)ip + (insn.disp << 1);
		err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE);
		if (err < 0)
			return err;
		bpf_jit_plt(expected_plt, (char *)ip + 6, old_addr);
		if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE))
			return -EINVAL;
		/* Adjust the call address. */
		s390_kernel_write(plt + (bpf_plt_target - bpf_plt),
				  &new_addr, sizeof(void *));
	}

	/* Adjust the mask of the branch. */
	insn.opc = 0xc004 | (new_addr ? 0xf0 : 0);
	s390_kernel_write((char *)ip + 1, (char *)&insn.opc + 1, 1);

	/* Make the new code visible to the other CPUs. */
	text_poke_sync_lock();

	return 0;
}

struct bpf_tramp_jit {
	struct bpf_jit common;
	int orig_stack_args_off;/* Offset of arguments placed on stack by the
				 * func_addr's original caller
				 */
	int stack_size;		/* Trampoline stack size */
	int stack_args_off;	/* Offset of stack arguments for calling
				 * func_addr, has to be at the top
				 */
	int reg_args_off;	/* Offset of register arguments for calling
				 * func_addr
				 */
	int ip_off;		/* For bpf_get_func_ip(), has to be at
				 * (ctx - 16)
				 */
	int arg_cnt_off;	/* For bpf_get_func_arg_cnt(), has to be at
				 * (ctx - 8)
				 */
	int bpf_args_off;	/* Offset of BPF_PROG context, which consists
				 * of BPF arguments followed by return value
				 */
	int retval_off;		/* Offset of return value (see above) */
	int r7_r8_off;		/* Offset of saved %r7 and %r8, which are used
				 * for __bpf_prog_enter() return value and
				 * func_addr respectively
				 */
	int r14_off;		/* Offset of saved %r14 */
	int run_ctx_off;	/* Offset of struct bpf_tramp_run_ctx */
	int do_fexit;		/* do_fexit: label */
};

static void load_imm64(struct bpf_jit *jit, int dst_reg, u64 val)
{
	/* llihf %dst_reg,val_hi */
	EMIT6_IMM(0xc00e0000, dst_reg, (val >> 32));
	/* oilf %rdst_reg,val_lo */
	EMIT6_IMM(0xc00d0000, dst_reg, val);
}

static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
			   const struct btf_func_model *m,
			   struct bpf_tramp_link *tlink, bool save_ret)
{
	struct bpf_jit *jit = &tjit->common;
	int cookie_off = tjit->run_ctx_off +
			 offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
	struct bpf_prog *p = tlink->link.prog;
	int patch;

	/*
	 * run_ctx.cookie = tlink->cookie;
	 */

	/* %r0 = tlink->cookie */
	load_imm64(jit, REG_W0, tlink->cookie);
	/* stg %r0,cookie_off(%r15) */
	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, REG_0, REG_15, cookie_off);

	/*
	 * if ((start = __bpf_prog_enter(p, &run_ctx)) == 0)
	 *         goto skip;
	 */

	/* %r1 = __bpf_prog_enter */
	load_imm64(jit, REG_1, (u64)bpf_trampoline_enter(p));
	/* %r2 = p */
	load_imm64(jit, REG_2, (u64)p);
	/* la %r3,run_ctx_off(%r15) */
	EMIT4_DISP(0x41000000, REG_3, REG_15, tjit->run_ctx_off);
	/* %r1() */
	call_r1(jit);
	/* ltgr %r7,%r2 */
	EMIT4(0xb9020000, REG_7, REG_2);
	/* brcl 8,skip */
	patch = jit->prg;
	EMIT6_PCREL_RILC(0xc0040000, 8, 0);

	/*
	 * retval = bpf_func(args, p->insnsi);
	 */

	/* %r1 = p->bpf_func */
	load_imm64(jit, REG_1, (u64)p->bpf_func);
	/* la %r2,bpf_args_off(%r15) */
	EMIT4_DISP(0x41000000, REG_2, REG_15, tjit->bpf_args_off);
	/* %r3 = p->insnsi */
	if (!p->jited)
		load_imm64(jit, REG_3, (u64)p->insnsi);
	/* %r1() */
	call_r1(jit);
	/* stg %r2,retval_off(%r15) */
	if (save_ret) {
		if (sign_extend(jit, REG_2, m->ret_size, m->ret_flags))
			return -1;
		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
			      tjit->retval_off);
	}

	/* skip: */
	if (jit->prg_buf)
		*(u32 *)&jit->prg_buf[patch + 2] = (jit->prg - patch) >> 1;

	/*
	 * __bpf_prog_exit(p, start, &run_ctx);
	 */

	/* %r1 = __bpf_prog_exit */
	load_imm64(jit, REG_1, (u64)bpf_trampoline_exit(p));
	/* %r2 = p */
	load_imm64(jit, REG_2, (u64)p);
	/* lgr %r3,%r7 */
	EMIT4(0xb9040000, REG_3, REG_7);
	/* la %r4,run_ctx_off(%r15) */
	EMIT4_DISP(0x41000000, REG_4, REG_15, tjit->run_ctx_off);
	/* %r1() */
	call_r1(jit);

	return 0;
}

static int alloc_stack(struct bpf_tramp_jit *tjit, size_t size)
{
	int stack_offset = tjit->stack_size;

	tjit->stack_size += size;
	return stack_offset;
}

/* ABI uses %r2 - %r6 for parameter passing. */
#define MAX_NR_REG_ARGS 5

/* The "L" field of the "mvc" instruction is 8 bits. */
#define MAX_MVC_SIZE 256
#define MAX_NR_STACK_ARGS (MAX_MVC_SIZE / sizeof(u64))

/* -mfentry generates a 6-byte nop on s390x. */
#define S390X_PATCH_SIZE 6

static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
					 struct bpf_tramp_jit *tjit,
					 const struct btf_func_model *m,
					 u32 flags,
					 struct bpf_tramp_links *tlinks,
					 void *func_addr)
{
	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
	int nr_bpf_args, nr_reg_args, nr_stack_args;
	struct bpf_jit *jit = &tjit->common;
	int arg, bpf_arg_off;
	int i, j;

	/* Support as many stack arguments as "mvc" instruction can handle. */
	nr_reg_args = min_t(int, m->nr_args, MAX_NR_REG_ARGS);
	nr_stack_args = m->nr_args - nr_reg_args;
	if (nr_stack_args > MAX_NR_STACK_ARGS)
		return -ENOTSUPP;

	/* Return to %r14, since func_addr and %r0 are not available. */
	if (!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK))
		flags |= BPF_TRAMP_F_SKIP_FRAME;

	/*
	 * Compute how many arguments we need to pass to BPF programs.
	 * BPF ABI mirrors that of x86_64: arguments that are 16 bytes or
	 * smaller are packed into 1 or 2 registers; larger arguments are
	 * passed via pointers.
	 * In s390x ABI, arguments that are 8 bytes or smaller are packed into
	 * a register; larger arguments are passed via pointers.
	 * We need to deal with this difference.
	 */
	nr_bpf_args = 0;
	for (i = 0; i < m->nr_args; i++) {
		if (m->arg_size[i] <= 8)
			nr_bpf_args += 1;
		else if (m->arg_size[i] <= 16)
			nr_bpf_args += 2;
		else
			return -ENOTSUPP;
	}

	/*
	 * Calculate the stack layout.
	 */

	/* Reserve STACK_FRAME_OVERHEAD bytes for the callees. */
	tjit->stack_size = STACK_FRAME_OVERHEAD;
	tjit->stack_args_off = alloc_stack(tjit, nr_stack_args * sizeof(u64));
	tjit->reg_args_off = alloc_stack(tjit, nr_reg_args * sizeof(u64));
	tjit->ip_off = alloc_stack(tjit, sizeof(u64));
	tjit->arg_cnt_off = alloc_stack(tjit, sizeof(u64));
	tjit->bpf_args_off = alloc_stack(tjit, nr_bpf_args * sizeof(u64));
	tjit->retval_off = alloc_stack(tjit, sizeof(u64));
	tjit->r7_r8_off = alloc_stack(tjit, 2 * sizeof(u64));
	tjit->r14_off = alloc_stack(tjit, sizeof(u64));
	tjit->run_ctx_off = alloc_stack(tjit,
					sizeof(struct bpf_tramp_run_ctx));
	/* The caller has already reserved STACK_FRAME_OVERHEAD bytes. */
	tjit->stack_size -= STACK_FRAME_OVERHEAD;
	tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD;

	/* aghi %r15,-stack_size */
	EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size);
	/* stmg %r2,%rN,fwd_reg_args_off(%r15) */
	if (nr_reg_args)
		EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2,
			      REG_2 + (nr_reg_args - 1), REG_15,
			      tjit->reg_args_off);
	for (i = 0, j = 0; i < m->nr_args; i++) {
		if (i < MAX_NR_REG_ARGS)
			arg = REG_2 + i;
		else
			arg = tjit->orig_stack_args_off +
			      (i - MAX_NR_REG_ARGS) * sizeof(u64);
		bpf_arg_off = tjit->bpf_args_off + j * sizeof(u64);
		if (m->arg_size[i] <= 8) {
			if (i < MAX_NR_REG_ARGS)
				/* stg %arg,bpf_arg_off(%r15) */
				EMIT6_DISP_LH(0xe3000000, 0x0024, arg,
					      REG_0, REG_15, bpf_arg_off);
			else
				/* mvc bpf_arg_off(8,%r15),arg(%r15) */
				_EMIT6(0xd207f000 | bpf_arg_off,
				       0xf000 | arg);
			j += 1;
		} else {
			if (i < MAX_NR_REG_ARGS) {
				/* mvc bpf_arg_off(16,%r15),0(%arg) */
				_EMIT6(0xd20ff000 | bpf_arg_off,
				       reg2hex[arg] << 12);
			} else {
				/* lg %r1,arg(%r15) */
				EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_0,
					      REG_15, arg);
				/* mvc bpf_arg_off(16,%r15),0(%r1) */
				_EMIT6(0xd20ff000 | bpf_arg_off, 0x1000);
			}
			j += 2;
		}
	}
	/* stmg %r7,%r8,r7_r8_off(%r15) */
	EMIT6_DISP_LH(0xeb000000, 0x0024, REG_7, REG_8, REG_15,
		      tjit->r7_r8_off);
	/* stg %r14,r14_off(%r15) */
	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_14, REG_0, REG_15, tjit->r14_off);

	if (flags & BPF_TRAMP_F_ORIG_STACK) {
		/*
		 * The ftrace trampoline puts the return address (which is the
		 * address of the original function + S390X_PATCH_SIZE) into
		 * %r0; see ftrace_shared_hotpatch_trampoline_br and
		 * ftrace_init_nop() for details.
		 */

		/* lgr %r8,%r0 */
		EMIT4(0xb9040000, REG_8, REG_0);
	} else {
		/* %r8 = func_addr + S390X_PATCH_SIZE */
		load_imm64(jit, REG_8, (u64)func_addr + S390X_PATCH_SIZE);
	}

	/*
	 * ip = func_addr;
	 * arg_cnt = m->nr_args;
	 */

	if (flags & BPF_TRAMP_F_IP_ARG) {
		/* %r0 = func_addr */
		load_imm64(jit, REG_0, (u64)func_addr);
		/* stg %r0,ip_off(%r15) */
		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15,
			      tjit->ip_off);
	}
	/* lghi %r0,nr_bpf_args */
	EMIT4_IMM(0xa7090000, REG_0, nr_bpf_args);
	/* stg %r0,arg_cnt_off(%r15) */
	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15,
		      tjit->arg_cnt_off);

	if (flags & BPF_TRAMP_F_CALL_ORIG) {
		/*
		 * __bpf_tramp_enter(im);
		 */

		/* %r1 = __bpf_tramp_enter */
		load_imm64(jit, REG_1, (u64)__bpf_tramp_enter);
		/* %r2 = im */
		load_imm64(jit, REG_2, (u64)im);
		/* %r1() */
		call_r1(jit);
	}

	for (i = 0; i < fentry->nr_links; i++)
		if (invoke_bpf_prog(tjit, m, fentry->links[i],
				    flags & BPF_TRAMP_F_RET_FENTRY_RET))
			return -EINVAL;

	if (fmod_ret->nr_links) {
		/*
		 * retval = 0;
		 */

		/* xc retval_off(8,%r15),retval_off(%r15) */
		_EMIT6(0xd707f000 | tjit->retval_off,
		       0xf000 | tjit->retval_off);

		for (i = 0; i < fmod_ret->nr_links; i++) {
			if (invoke_bpf_prog(tjit, m, fmod_ret->links[i], true))
				return -EINVAL;

			/*
			 * if (retval)
			 *         goto do_fexit;
			 */

			/* ltg %r0,retval_off(%r15) */
			EMIT6_DISP_LH(0xe3000000, 0x0002, REG_0, REG_0, REG_15,
				      tjit->retval_off);
			/* brcl 7,do_fexit */
			EMIT6_PCREL_RILC(0xc0040000, 7, tjit->do_fexit);
		}
	}

	if (flags & BPF_TRAMP_F_CALL_ORIG) {
		/*
		 * retval = func_addr(args);
		 */

		/* lmg %r2,%rN,reg_args_off(%r15) */
		if (nr_reg_args)
			EMIT6_DISP_LH(0xeb000000, 0x0004, REG_2,
				      REG_2 + (nr_reg_args - 1), REG_15,
				      tjit->reg_args_off);
		/* mvc stack_args_off(N,%r15),orig_stack_args_off(%r15) */
		if (nr_stack_args)
			_EMIT6(0xd200f000 |
				       (nr_stack_args * sizeof(u64) - 1) << 16 |
				       tjit->stack_args_off,
			       0xf000 | tjit->orig_stack_args_off);
		/* lgr %r1,%r8 */
		EMIT4(0xb9040000, REG_1, REG_8);
		/* %r1() */
		call_r1(jit);
		/* stg %r2,retval_off(%r15) */
		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
			      tjit->retval_off);

		im->ip_after_call = jit->prg_buf + jit->prg;

		/*
		 * The following nop will be patched by bpf_tramp_image_put().
		 */

		/* brcl 0,im->ip_epilogue */
		EMIT6_PCREL_RILC(0xc0040000, 0, (u64)im->ip_epilogue);
	}

	/* do_fexit: */
	tjit->do_fexit = jit->prg;
	for (i = 0; i < fexit->nr_links; i++)
		if (invoke_bpf_prog(tjit, m, fexit->links[i], false))
			return -EINVAL;

	if (flags & BPF_TRAMP_F_CALL_ORIG) {
		im->ip_epilogue = jit->prg_buf + jit->prg;

		/*
		 * __bpf_tramp_exit(im);
		 */

		/* %r1 = __bpf_tramp_exit */
		load_imm64(jit, REG_1, (u64)__bpf_tramp_exit);
		/* %r2 = im */
		load_imm64(jit, REG_2, (u64)im);
		/* %r1() */
		call_r1(jit);
	}

	/* lmg %r2,%rN,reg_args_off(%r15) */
	if ((flags & BPF_TRAMP_F_RESTORE_REGS) && nr_reg_args)
		EMIT6_DISP_LH(0xeb000000, 0x0004, REG_2,
			      REG_2 + (nr_reg_args - 1), REG_15,
			      tjit->reg_args_off);
	/* lgr %r1,%r8 */
	if (!(flags & BPF_TRAMP_F_SKIP_FRAME))
		EMIT4(0xb9040000, REG_1, REG_8);
	/* lmg %r7,%r8,r7_r8_off(%r15) */
	EMIT6_DISP_LH(0xeb000000, 0x0004, REG_7, REG_8, REG_15,
		      tjit->r7_r8_off);
	/* lg %r14,r14_off(%r15) */
	EMIT6_DISP_LH(0xe3000000, 0x0004, REG_14, REG_0, REG_15, tjit->r14_off);
	/* lg %r2,retval_off(%r15) */
	if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET))
		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15,
			      tjit->retval_off);
	/* aghi %r15,stack_size */
	EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
	/* Emit an expoline for the following indirect jump. */
	if (nospec_uses_trampoline())
		emit_expoline(jit);
	if (flags & BPF_TRAMP_F_SKIP_FRAME)
		/* br %r14 */
		_EMIT2(0x07fe);
	else
		/* br %r1 */
		_EMIT2(0x07f1);

	emit_r1_thunk(jit);

	return 0;
}

int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
				void *image_end, const struct btf_func_model *m,
				u32 flags, struct bpf_tramp_links *tlinks,
				void *func_addr)
{
	struct bpf_tramp_jit tjit;
	int ret;
	int i;

	for (i = 0; i < 2; i++) {
		if (i == 0) {
			/* Compute offsets, check whether the code fits. */
			memset(&tjit, 0, sizeof(tjit));
		} else {
			/* Generate the code. */
			tjit.common.prg = 0;
			tjit.common.prg_buf = image;
		}
		ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
						    tlinks, func_addr);
		if (ret < 0)
			return ret;
		if (tjit.common.prg > (char *)image_end - (char *)image)
			/*
			 * Use the same error code as for exceeding
			 * BPF_MAX_TRAMP_LINKS.
			 */
			return -E2BIG;
	}

	return ret;
}

bool bpf_jit_supports_subprog_tailcalls(void)
{
	return true;
}
+4 −0
Original line number Diff line number Diff line
@@ -944,7 +944,11 @@ struct btf_func_model {
 * bytes on x86.
 */
enum {
#if defined(__s390x__)
	BPF_MAX_TRAMP_LINKS = 27,
#else
	BPF_MAX_TRAMP_LINKS = 38,
#endif
};

struct bpf_tramp_links {
+0 −69

File changed.

Preview size limit exceeded, changes collapsed.

+19 −6
Original line number Diff line number Diff line
@@ -29,7 +29,23 @@ static int stop, duration;
static bool
configure_stack(void)
{
	char tc_version[128];
	char tc_cmd[BUFSIZ];
	char *prog;
	FILE *tc;

	/* Check whether tc is built with libbpf. */
	tc = popen("tc -V", "r");
	if (CHECK_FAIL(!tc))
		return false;
	if (CHECK_FAIL(!fgets(tc_version, sizeof(tc_version), tc)))
		return false;
	if (strstr(tc_version, ", libbpf "))
		prog = "test_sk_assign_libbpf.bpf.o";
	else
		prog = "test_sk_assign.bpf.o";
	if (CHECK_FAIL(pclose(tc)))
		return false;

	/* Move to a new networking namespace */
	if (CHECK_FAIL(unshare(CLONE_NEWNET)))
@@ -46,8 +62,8 @@ configure_stack(void)
	/* Load qdisc, BPF program */
	if (CHECK_FAIL(system("tc qdisc add dev lo clsact")))
		return false;
	sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf",
		       "direct-action object-file ./test_sk_assign.bpf.o",
	sprintf(tc_cmd, "%s %s %s %s %s", "tc filter add dev lo ingress bpf",
		       "direct-action object-file", prog,
		       "section tc",
		       (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "verbose");
	if (CHECK(system(tc_cmd), "BPF load failed;",
@@ -129,15 +145,12 @@ get_port(int fd)
static ssize_t
rcv_msg(int srv_client, int type)
{
	struct sockaddr_storage ss;
	char buf[BUFSIZ];
	socklen_t slen;

	if (type == SOCK_STREAM)
		return read(srv_client, &buf, sizeof(buf));
	else
		return recvfrom(srv_client, &buf, sizeof(buf), 0,
				(struct sockaddr *)&ss, &slen);
		return recvfrom(srv_client, &buf, sizeof(buf), 0, NULL, NULL);
}

static int
+11 −0
Original line number Diff line number Diff line
@@ -16,6 +16,16 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>

#if defined(IPROUTE2_HAVE_LIBBPF)
/* Use a new-style map definition. */
struct {
	__uint(type, BPF_MAP_TYPE_SOCKMAP);
	__type(key, int);
	__type(value, __u64);
	__uint(pinning, LIBBPF_PIN_BY_NAME);
	__uint(max_entries, 1);
} server_map SEC(".maps");
#else
/* Pin map under /sys/fs/bpf/tc/globals/<map name> */
#define PIN_GLOBAL_NS 2

@@ -35,6 +45,7 @@ struct {
	.max_elem = 1,
	.pinning = PIN_GLOBAL_NS,
};
#endif

char _license[] SEC("license") = "GPL";

Loading