Commit b664db8e authored by Leonardo Bras's avatar Leonardo Bras Committed by Michael Ellerman
Browse files

powerpc/rtas: Implement reentrant rtas call



Implement rtas_call_reentrant() for reentrant rtas-calls:
"ibm,int-on", "ibm,int-off",ibm,get-xive" and  "ibm,set-xive".

On LoPAPR Version 1.1 (March 24, 2016), from 7.3.10.1 to 7.3.10.4,
items 2 and 3 say:

2 - For the PowerPC External Interrupt option: The * call must be
reentrant to the number of processors on the platform.
3 - For the PowerPC External Interrupt option: The * argument call
buffer for each simultaneous call must be physically unique.

So, these rtas-calls can be called in a lockless way, if using
a different buffer for each cpu doing such rtas call.

For this, it was suggested to add the buffer (struct rtas_args)
in the PACA struct, so each cpu can have it's own buffer.
The PACA struct received a pointer to rtas buffer, which is
allocated in the memory range available to rtas 32-bit.

Reentrant rtas calls are useful to avoid deadlocks in crashing,
where rtas-calls are needed, but some other thread crashed holding
the rtas.lock.

This is a backtrace of a deadlock from a kdump testing environment:

  #0 arch_spin_lock
  #1  lock_rtas ()
  #2  rtas_call (token=8204, nargs=1, nret=1, outputs=0x0)
  #3  ics_rtas_mask_real_irq (hw_irq=4100)
  #4  machine_kexec_mask_interrupts
  #5  default_machine_crash_shutdown
  #6  machine_crash_shutdown
  #7  __crash_kexec
  #8  crash_kexec
  #9  oops_end

Signed-off-by: default avatarLeonardo Bras <leobras.c@gmail.com>
[mpe: Move under #ifdef PSERIES to avoid build breakage]
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200518234245.200672-3-leobras.c@gmail.com
parent 783a015b
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -29,6 +29,7 @@
#include <asm/hmi.h>
#include <asm/cpuidle.h>
#include <asm/atomic.h>
#include <asm/rtas-types.h>

#include <asm-generic/mmiowb_types.h>

@@ -256,6 +257,7 @@ struct paca_struct {
	u64 l1d_flush_size;
#endif
#ifdef CONFIG_PPC_PSERIES
	struct rtas_args *rtas_args_reentrant;
	u8 *mce_data_buf;		/* buffer to hold per cpu rtas errlog */
#endif /* CONFIG_PPC_PSERIES */

+1 −0
Original line number Diff line number Diff line
@@ -236,6 +236,7 @@ extern struct rtas_t rtas;
extern int rtas_token(const char *service);
extern int rtas_service_present(const char *service);
extern int rtas_call(int token, int, int, int *, ...);
int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...);
void rtas_call_unlocked(struct rtas_args *args, int token, int nargs,
			int nret, ...);
extern void __noreturn rtas_restart(char *cmd);
+32 −0
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@
#include <asm/kexec.h>
#include <asm/svm.h>
#include <asm/ultravisor.h>
#include <asm/rtas.h>

#include "setup.h"

@@ -164,6 +165,30 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)

#endif /* CONFIG_PPC_BOOK3S_64 */

#ifdef CONFIG_PPC_PSERIES
/**
 * new_rtas_args() - Allocates rtas args
 * @cpu:	CPU number
 * @limit:	Memory limit for this allocation
 *
 * Allocates a struct rtas_args and return it's pointer,
 * if not in Hypervisor mode
 *
 * Return:	Pointer to allocated rtas_args
 *		NULL if CPU in Hypervisor Mode
 */
static struct rtas_args * __init new_rtas_args(int cpu, unsigned long limit)
{
	limit = min_t(unsigned long, limit, RTAS_INSTANTIATE_MAX);

	if (early_cpu_has_feature(CPU_FTR_HVMODE))
		return NULL;

	return alloc_paca_data(sizeof(struct rtas_args), L1_CACHE_BYTES,
			       limit, cpu);
}
#endif /* CONFIG_PPC_PSERIES */

/* The Paca is an array with one entry per processor.  Each contains an
 * lppaca, which contains the information shared between the
 * hypervisor and Linux.
@@ -202,6 +227,10 @@ void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int
	/* For now -- if we have threads this will be adjusted later */
	new_paca->tcd_ptr = &new_paca->tcd;
#endif

#ifdef CONFIG_PPC_PSERIES
	new_paca->rtas_args_reentrant = NULL;
#endif
}

/* Put the paca pointer into r13 and SPRG_PACA */
@@ -273,6 +302,9 @@ void __init allocate_paca(int cpu)
#endif
#ifdef CONFIG_PPC_BOOK3S_64
	paca->slb_shadow_ptr = new_slb_shadow(cpu, limit);
#endif
#ifdef CONFIG_PPC_PSERIES
	paca->rtas_args_reentrant = new_rtas_args(cpu, limit);
#endif
	paca_struct_size += sizeof(struct paca_struct);
}
+52 −0
Original line number Diff line number Diff line
@@ -41,6 +41,7 @@
#include <asm/time.h>
#include <asm/mmu.h>
#include <asm/topology.h>
#include <asm/paca.h>

/* This is here deliberately so it's only used in this file */
void enter_rtas(unsigned long);
@@ -1014,6 +1015,57 @@ int rtas_ibm_suspend_me(u64 handle)
	free_cpumask_var(offline_mask);
	return atomic_read(&data.error);
}

/**
 * rtas_call_reentrant() - Used for reentrant rtas calls
 * @token:	Token for desired reentrant RTAS call
 * @nargs:	Number of Input Parameters
 * @nret:	Number of Output Parameters
 * @outputs:	Array of outputs
 * @...:	Inputs for desired RTAS call
 *
 * According to LoPAR documentation, only "ibm,int-on", "ibm,int-off",
 * "ibm,get-xive" and "ibm,set-xive" are currently reentrant.
 * Reentrant calls need their own rtas_args buffer, so not using rtas.args, but
 * PACA one instead.
 *
 * Return:	-1 on error,
 *		First output value of RTAS call if (nret > 0),
 *		0 otherwise,
 */
int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...)
{
	va_list list;
	struct rtas_args *args;
	unsigned long flags;
	int i, ret = 0;

	if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE)
		return -1;

	local_irq_save(flags);
	preempt_disable();

	/* We use the per-cpu (PACA) rtas args buffer */
	args = local_paca->rtas_args_reentrant;

	va_start(list, outputs);
	va_rtas_call_unlocked(args, token, nargs, nret, list);
	va_end(list);

	if (nret > 1 && outputs)
		for (i = 0; i < nret - 1; ++i)
			outputs[i] = be32_to_cpu(args->rets[i + 1]);

	if (nret > 0)
		ret = be32_to_cpu(args->rets[0]);

	local_irq_restore(flags);
	preempt_enable();

	return ret;
}

#else /* CONFIG_PPC_PSERIES */
int rtas_ibm_suspend_me(u64 handle)
{
+11 −11
Original line number Diff line number Diff line
@@ -50,8 +50,8 @@ static void ics_rtas_unmask_irq(struct irq_data *d)

	server = xics_get_irq_server(d->irq, irq_data_get_affinity_mask(d), 0);

	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq, server,
				DEFAULT_PRIORITY);
	call_status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, hw_irq,
					  server, DEFAULT_PRIORITY);
	if (call_status != 0) {
		printk(KERN_ERR
			"%s: ibm_set_xive irq %u server %x returned %d\n",
@@ -60,7 +60,7 @@ static void ics_rtas_unmask_irq(struct irq_data *d)
	}

	/* Now unmask the interrupt (often a no-op) */
	call_status = rtas_call(ibm_int_on, 1, 1, NULL, hw_irq);
	call_status = rtas_call_reentrant(ibm_int_on, 1, 1, NULL, hw_irq);
	if (call_status != 0) {
		printk(KERN_ERR "%s: ibm_int_on irq=%u returned %d\n",
			__func__, hw_irq, call_status);
@@ -91,7 +91,7 @@ static void ics_rtas_mask_real_irq(unsigned int hw_irq)
	if (hw_irq == XICS_IPI)
		return;

	call_status = rtas_call(ibm_int_off, 1, 1, NULL, hw_irq);
	call_status = rtas_call_reentrant(ibm_int_off, 1, 1, NULL, hw_irq);
	if (call_status != 0) {
		printk(KERN_ERR "%s: ibm_int_off irq=%u returned %d\n",
			__func__, hw_irq, call_status);
@@ -99,7 +99,7 @@ static void ics_rtas_mask_real_irq(unsigned int hw_irq)
	}

	/* Have to set XIVE to 0xff to be able to remove a slot */
	call_status = rtas_call(ibm_set_xive, 3, 1, NULL, hw_irq,
	call_status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL, hw_irq,
					  xics_default_server, 0xff);
	if (call_status != 0) {
		printk(KERN_ERR "%s: ibm_set_xive(0xff) irq=%u returned %d\n",
@@ -131,7 +131,7 @@ static int ics_rtas_set_affinity(struct irq_data *d,
	if (hw_irq == XICS_IPI || hw_irq == XICS_IRQ_SPURIOUS)
		return -1;

	status = rtas_call(ibm_get_xive, 1, 3, xics_status, hw_irq);
	status = rtas_call_reentrant(ibm_get_xive, 1, 3, xics_status, hw_irq);

	if (status) {
		printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
@@ -146,7 +146,7 @@ static int ics_rtas_set_affinity(struct irq_data *d,
		return -1;
	}

	status = rtas_call(ibm_set_xive, 3, 1, NULL,
	status = rtas_call_reentrant(ibm_set_xive, 3, 1, NULL,
				     hw_irq, irq_server, xics_status[1]);

	if (status) {
@@ -179,7 +179,7 @@ static int ics_rtas_map(struct ics *ics, unsigned int virq)
		return -EINVAL;

	/* Check if RTAS knows about this interrupt */
	rc = rtas_call(ibm_get_xive, 1, 3, status, hw_irq);
	rc = rtas_call_reentrant(ibm_get_xive, 1, 3, status, hw_irq);
	if (rc)
		return -ENXIO;

@@ -198,7 +198,7 @@ static long ics_rtas_get_server(struct ics *ics, unsigned long vec)
{
	int rc, status[2];

	rc = rtas_call(ibm_get_xive, 1, 3, status, vec);
	rc = rtas_call_reentrant(ibm_get_xive, 1, 3, status, vec);
	if (rc)
		return -1;
	return status[0];