Commit af99da74 authored by Balamuruhan S's avatar Balamuruhan S Committed by Michael Ellerman
Browse files

powerpc/sstep: Support VSX vector paired storage access instructions



VSX Vector Paired instructions loads/stores an octword (32 bytes)
from/to storage into two sequential VSRs. Add emulation support
for these new instructions:
  * Load VSX Vector Paired (lxvp)
  * Load VSX Vector Paired Indexed (lxvpx)
  * Prefixed Load VSX Vector Paired (plxvp)
  * Store VSX Vector Paired (stxvp)
  * Store VSX Vector Paired Indexed (stxvpx)
  * Prefixed Store VSX Vector Paired (pstxvp)

[kernel test robot reported a build failure]

Reported-by: default avatarkernel test robot <lkp@intel.com>
Suggested-by: default avatarNaveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: default avatarBalamuruhan S <bala24@linux.ibm.com>
Signed-off-by: default avatarRavi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201011050908.72173-4-ravi.bangoria@linux.ibm.com
parent 1817de2f
Loading
Loading
Loading
Loading
+129 −21
Original line number Diff line number Diff line
@@ -32,6 +32,10 @@ extern char system_call_vectored_emulate[];
#define XER_OV32	0x00080000U
#define XER_CA32	0x00040000U

#ifdef CONFIG_VSX
#define VSX_REGISTER_XTP(rd)   ((((rd) & 1) << 5) | ((rd) & 0xfe))
#endif

#ifdef CONFIG_PPC_FPU
/*
 * Functions in ldstfp.S
@@ -279,6 +283,19 @@ static nokprobe_inline void do_byte_reverse(void *ptr, int nb)
		up[1] = tmp;
		break;
	}
	case 32: {
		unsigned long *up = (unsigned long *)ptr;
		unsigned long tmp;

		tmp = byterev_8(up[0]);
		up[0] = byterev_8(up[3]);
		up[3] = tmp;
		tmp = byterev_8(up[2]);
		up[2] = byterev_8(up[1]);
		up[1] = tmp;
		break;
	}

#endif
	default:
		WARN_ON_ONCE(1);
@@ -709,6 +726,8 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg,
	reg->d[0] = reg->d[1] = 0;

	switch (op->element_size) {
	case 32:
		/* [p]lxvp[x] */
	case 16:
		/* whole vector; lxv[x] or lxvl[l] */
		if (size == 0)
@@ -717,7 +736,7 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg,
		if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
			rev = !rev;
		if (rev)
			do_byte_reverse(reg, 16);
			do_byte_reverse(reg, size);
		break;
	case 8:
		/* scalar loads, lxvd2x, lxvdsx */
@@ -793,6 +812,20 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg,
	size = GETSIZE(op->type);

	switch (op->element_size) {
	case 32:
		/* [p]stxvp[x] */
		if (size == 0)
			break;
		if (rev) {
			/* reverse 32 bytes */
			buf.d[0] = byterev_8(reg->d[3]);
			buf.d[1] = byterev_8(reg->d[2]);
			buf.d[2] = byterev_8(reg->d[1]);
			buf.d[3] = byterev_8(reg->d[0]);
			reg = &buf;
		}
		memcpy(mem, reg, size);
		break;
	case 16:
		/* stxv, stxvx, stxvl, stxvll */
		if (size == 0)
@@ -861,28 +894,43 @@ static nokprobe_inline int do_vsx_load(struct instruction_op *op,
				       bool cross_endian)
{
	int reg = op->reg;
	u8 mem[16];
	union vsx_reg buf;
	int i, j, nr_vsx_regs;
	u8 mem[32];
	union vsx_reg buf[2];
	int size = GETSIZE(op->type);

	if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs))
		return -EFAULT;

	emulate_vsx_load(op, &buf, mem, cross_endian);
	nr_vsx_regs = size / sizeof(__vector128);
	emulate_vsx_load(op, buf, mem, cross_endian);
	preempt_disable();
	if (reg < 32) {
		/* FP regs + extensions */
		if (regs->msr & MSR_FP) {
			load_vsrn(reg, &buf);
			for (i = 0; i < nr_vsx_regs; i++) {
				j = IS_LE ? nr_vsx_regs - i - 1 : i;
				load_vsrn(reg + i, &buf[j].v);
			}
		} else {
			current->thread.fp_state.fpr[reg][0] = buf.d[0];
			current->thread.fp_state.fpr[reg][1] = buf.d[1];
			for (i = 0; i < nr_vsx_regs; i++) {
				j = IS_LE ? nr_vsx_regs - i - 1 : i;
				current->thread.fp_state.fpr[reg + i][0] = buf[j].d[0];
				current->thread.fp_state.fpr[reg + i][1] = buf[j].d[1];
			}
		}
	} else {
		if (regs->msr & MSR_VEC)
			load_vsrn(reg, &buf);
		else
			current->thread.vr_state.vr[reg - 32] = buf.v;
		if (regs->msr & MSR_VEC) {
			for (i = 0; i < nr_vsx_regs; i++) {
				j = IS_LE ? nr_vsx_regs - i - 1 : i;
				load_vsrn(reg + i, &buf[j].v);
			}
		} else {
			for (i = 0; i < nr_vsx_regs; i++) {
				j = IS_LE ? nr_vsx_regs - i - 1 : i;
				current->thread.vr_state.vr[reg - 32 + i] = buf[j].v;
			}
		}
	}
	preempt_enable();
	return 0;
@@ -893,30 +941,45 @@ static nokprobe_inline int do_vsx_store(struct instruction_op *op,
					bool cross_endian)
{
	int reg = op->reg;
	u8 mem[16];
	union vsx_reg buf;
	int i, j, nr_vsx_regs;
	u8 mem[32];
	union vsx_reg buf[2];
	int size = GETSIZE(op->type);

	if (!address_ok(regs, ea, size))
		return -EFAULT;

	nr_vsx_regs = size / sizeof(__vector128);
	preempt_disable();
	if (reg < 32) {
		/* FP regs + extensions */
		if (regs->msr & MSR_FP) {
			store_vsrn(reg, &buf);
			for (i = 0; i < nr_vsx_regs; i++) {
				j = IS_LE ? nr_vsx_regs - i - 1 : i;
				store_vsrn(reg + i, &buf[j].v);
			}
		} else {
			buf.d[0] = current->thread.fp_state.fpr[reg][0];
			buf.d[1] = current->thread.fp_state.fpr[reg][1];
			for (i = 0; i < nr_vsx_regs; i++) {
				j = IS_LE ? nr_vsx_regs - i - 1 : i;
				buf[j].d[0] = current->thread.fp_state.fpr[reg + i][0];
				buf[j].d[1] = current->thread.fp_state.fpr[reg + i][1];
			}
		}
	} else {
		if (regs->msr & MSR_VEC)
			store_vsrn(reg, &buf);
		else
			buf.v = current->thread.vr_state.vr[reg - 32];
		if (regs->msr & MSR_VEC) {
			for (i = 0; i < nr_vsx_regs; i++) {
				j = IS_LE ? nr_vsx_regs - i - 1 : i;
				store_vsrn(reg + i, &buf[j].v);
			}
		} else {
			for (i = 0; i < nr_vsx_regs; i++) {
				j = IS_LE ? nr_vsx_regs - i - 1 : i;
				buf[j].v = current->thread.vr_state.vr[reg - 32 + i];
			}
		}
	}
	preempt_enable();
	emulate_vsx_store(op, &buf, mem, cross_endian);
	emulate_vsx_store(op, buf, mem, cross_endian);
	return  copy_mem_out(mem, ea, size, regs);
}
#endif /* CONFIG_VSX */
@@ -2403,6 +2466,14 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
			op->vsx_flags = VSX_SPLAT;
			break;

		case 333:       /* lxvpx */
			if (!cpu_has_feature(CPU_FTR_ARCH_31))
				return -1;
			op->reg = VSX_REGISTER_XTP(rd);
			op->type = MKOP(LOAD_VSX, 0, 32);
			op->element_size = 32;
			break;

		case 364:	/* lxvwsx */
			op->reg = rd | ((word & 1) << 5);
			op->type = MKOP(LOAD_VSX, 0, 4);
@@ -2431,6 +2502,13 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
				VSX_CHECK_VEC;
			break;
		}
		case 461:       /* stxvpx */
			if (!cpu_has_feature(CPU_FTR_ARCH_31))
				return -1;
			op->reg = VSX_REGISTER_XTP(rd);
			op->type = MKOP(STORE_VSX, 0, 32);
			op->element_size = 32;
			break;
		case 524:	/* lxsspx */
			op->reg = rd | ((word & 1) << 5);
			op->type = MKOP(LOAD_VSX, 0, 4);
@@ -2672,6 +2750,22 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
#endif

#ifdef CONFIG_VSX
	case 6:
		if (!cpu_has_feature(CPU_FTR_ARCH_31))
			return -1;
		op->ea = dqform_ea(word, regs);
		op->reg = VSX_REGISTER_XTP(rd);
		op->element_size = 32;
		switch (word & 0xf) {
		case 0:         /* lxvp */
			op->type = MKOP(LOAD_VSX, 0, 32);
			break;
		case 1:         /* stxvp */
			op->type = MKOP(STORE_VSX, 0, 32);
			break;
		}
		break;

	case 61:	/* stfdp, lxv, stxsd, stxssp, stxv */
		switch (word & 7) {
		case 0:		/* stfdp with LSB of DS field = 0 */
@@ -2805,12 +2899,26 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
			case 57:	/* pld */
				op->type = MKOP(LOAD, PREFIXED, 8);
				break;
#ifdef CONFIG_VSX
			case 58:        /* plxvp */
				op->reg = VSX_REGISTER_XTP(rd);
				op->type = MKOP(LOAD_VSX, PREFIXED, 32);
				op->element_size = 32;
				break;
#endif /* CONFIG_VSX */
			case 60:        /* pstq */
				op->type = MKOP(STORE, PREFIXED, 16);
				break;
			case 61:	/* pstd */
				op->type = MKOP(STORE, PREFIXED, 8);
				break;
#ifdef CONFIG_VSX
			case 62:        /* pstxvp */
				op->reg = VSX_REGISTER_XTP(rd);
				op->type = MKOP(STORE_VSX, PREFIXED, 32);
				op->element_size = 32;
				break;
#endif /* CONFIG_VSX */
			}
			break;
		case 1: /* Type 01 Eight-Byte Register-to-Register */