Unverified Commit 1d194b6b authored by Mark Brown's avatar Mark Brown
Browse files

ASoC: SOF: Re-visit firmware state and panic tracking/handling

Merge series from Peter Ujfalusi <peter.ujfalusi@linux.intel.com>:

this series will improve how we are tracking the firmware's state to be able to
avoid communication with it when it is not going to answer due to a panic and
we will attempt to force power cycle the DSP to recover at the next runtime
suspend time.

The state handling brings in other improvements on the way the kernel reports
errors and DSP panics to reduce the printed lines for normal users, but at the
same time allowing developers (or for bug reports) to have more precise
information available to track down the issue.

We can now place messages easily in the correct debug level and not bound to the
static ERROR for some of the print chains, causing excess amount or partial
information to be printed, confusing users and machines (CI).

I would have prefered to split this series up, but it was developed together to
achieve a single goal to reduce the noise, but also provide the details we need
to be able to rootcause issues.
parents 8a2d8e4f 34bfba9a
Loading
Loading
Loading
Loading
+22 −0
Original line number Diff line number Diff line
@@ -17,6 +17,28 @@

struct snd_sof_dsp_ops;

/**
 * enum sof_fw_state - DSP firmware state definitions
 * @SOF_FW_BOOT_NOT_STARTED:	firmware boot is not yet started
 * @SOF_FW_BOOT_PREPARE:	preparing for boot (firmware loading for exaqmple)
 * @SOF_FW_BOOT_IN_PROGRESS:	firmware boot is in progress
 * @SOF_FW_BOOT_FAILED:		firmware boot failed
 * @SOF_FW_BOOT_READY_FAILED:	firmware booted but fw_ready op failed
 * @SOF_FW_BOOT_READY_OK:	firmware booted and fw_ready op passed
 * @SOF_FW_BOOT_COMPLETE:	firmware is booted up and functional
 * @SOF_FW_CRASHED:		firmware crashed after successful boot
 */
enum sof_fw_state {
	SOF_FW_BOOT_NOT_STARTED = 0,
	SOF_FW_BOOT_PREPARE,
	SOF_FW_BOOT_IN_PROGRESS,
	SOF_FW_BOOT_FAILED,
	SOF_FW_BOOT_READY_FAILED,
	SOF_FW_BOOT_READY_OK,
	SOF_FW_BOOT_COMPLETE,
	SOF_FW_CRASHED,
};

/*
 * SOF Platform data.
 */
+74 −45
Original line number Diff line number Diff line
@@ -19,7 +19,7 @@
#endif

/* see SOF_DBG_ flags */
int sof_core_debug =  IS_ENABLED(CONFIG_SND_SOC_SOF_DEBUG_ENABLE_FIRMWARE_TRACE);
static int sof_core_debug =  IS_ENABLED(CONFIG_SND_SOC_SOF_DEBUG_ENABLE_FIRMWARE_TRACE);
module_param_named(sof_debug, sof_core_debug, int, 0444);
MODULE_PARM_DESC(sof_debug, "SOF core debug options (0x0 all off)");

@@ -27,6 +27,22 @@ MODULE_PARM_DESC(sof_debug, "SOF core debug options (0x0 all off)");
#define TIMEOUT_DEFAULT_IPC_MS  500
#define TIMEOUT_DEFAULT_BOOT_MS 2000

/**
 * sof_debug_check_flag - check if a given flag(s) is set in sof_core_debug
 * @mask: Flag or combination of flags to check
 *
 * Returns true if all bits set in mask is also set in sof_core_debug, otherwise
 * false
 */
bool sof_debug_check_flag(int mask)
{
	if ((sof_core_debug & mask) == mask)
		return true;

	return false;
}
EXPORT_SYMBOL(sof_debug_check_flag);

/*
 * FW Panic/fault handling.
 */
@@ -52,13 +68,23 @@ static const struct sof_panic_msg panic_msg[] = {
	{SOF_IPC_PANIC_ASSERT, "assertion failed"},
};

/*
/**
 * sof_print_oops_and_stack - Handle the printing of DSP oops and stack trace
 * @sdev: Pointer to the device's sdev
 * @level: prink log level to use for the printing
 * @panic_code: the panic code
 * @tracep_code: tracepoint code
 * @oops: Pointer to DSP specific oops data
 * @panic_info: Pointer to the received panic information message
 * @stack: Pointer to the call stack data
 * @stack_words: Number of words in the stack data
 *
 * helper to be called from .dbg_dump callbacks. No error code is
 * provided, it's left as an exercise for the caller of .dbg_dump
 * (typically IPC or loader)
 */
void snd_sof_get_status(struct snd_sof_dev *sdev, u32 panic_code,
			u32 tracep_code, void *oops,
void sof_print_oops_and_stack(struct snd_sof_dev *sdev, const char *level,
			      u32 panic_code, u32 tracep_code, void *oops,
			      struct sof_ipc_panic_info *panic_info,
			      void *stack, size_t stack_words)
{
@@ -67,7 +93,7 @@ void snd_sof_get_status(struct snd_sof_dev *sdev, u32 panic_code,

	/* is firmware dead ? */
	if ((panic_code & SOF_IPC_PANIC_MAGIC_MASK) != SOF_IPC_PANIC_MAGIC) {
		dev_err(sdev->dev, "unexpected fault %#010x trace %#010x\n",
		dev_printk(level, sdev->dev, "unexpected fault %#010x trace %#010x\n",
			   panic_code, tracep_code);
		return; /* no fault ? */
	}
@@ -76,54 +102,55 @@ void snd_sof_get_status(struct snd_sof_dev *sdev, u32 panic_code,

	for (i = 0; i < ARRAY_SIZE(panic_msg); i++) {
		if (panic_msg[i].id == code) {
			dev_err(sdev->dev, "reason: %s (%#x)\n", panic_msg[i].msg,
				code & SOF_IPC_PANIC_CODE_MASK);
			dev_err(sdev->dev, "trace point: %#010x\n", tracep_code);
			dev_printk(level, sdev->dev, "reason: %s (%#x)\n",
				   panic_msg[i].msg, code & SOF_IPC_PANIC_CODE_MASK);
			dev_printk(level, sdev->dev, "trace point: %#010x\n", tracep_code);
			goto out;
		}
	}

	/* unknown error */
	dev_err(sdev->dev, "unknown panic code: %#x\n", code & SOF_IPC_PANIC_CODE_MASK);
	dev_err(sdev->dev, "trace point: %#010x\n", tracep_code);
	dev_printk(level, sdev->dev, "unknown panic code: %#x\n",
		   code & SOF_IPC_PANIC_CODE_MASK);
	dev_printk(level, sdev->dev, "trace point: %#010x\n", tracep_code);

out:
	dev_err(sdev->dev, "panic at %s:%d\n", panic_info->filename,
	dev_printk(level, sdev->dev, "panic at %s:%d\n", panic_info->filename,
		   panic_info->linenum);
	sof_oops(sdev, oops);
	sof_stack(sdev, oops, stack, stack_words);
	sof_oops(sdev, level, oops);
	sof_stack(sdev, level, oops, stack, stack_words);
}
EXPORT_SYMBOL(snd_sof_get_status);
EXPORT_SYMBOL(sof_print_oops_and_stack);

/*
 *			FW Boot State Transition Diagram
 *
 *    +-----------------------------------------------------------------------+
 *    +----------------------------------------------------------------------+
 *    |									     |
 * ------------------	     ------------------				     |
 * |		    |	     |		      |				     |
 * |   BOOT_FAILED  |	     |  READY_FAILED  |-------------------------+     |
 * |		    |	     |	              |				|     |
 * ------------------	     ------------------				|     |
 *	^			    ^					|     |
 *	|			    |					|     |
 * (FW Boot Timeout)		(FW_READY FAIL)				|     |
 *	|			    |					|     |
 *	|			    |					|     |
 * ------------------		    |		   ------------------	|     |
 * |   BOOT_FAILED  |<-------|  READY_FAILED  |				     |
 * |		    |<--+    |	              |	   ------------------	     |
 * ------------------	|    ------------------	   |		    |	     |
 *	^		|	    ^		   |	CRASHED	    |---+    |
 *	|		|	    |		   |		    |	|    |
 * (FW Boot Timeout)	|	(FW_READY FAIL)	   ------------------	|    |
 *	|		|	    |		     ^			|    |
 *	|		|	    |		     |(DSP Panic)	|    |
 * ------------------	|	    |		   ------------------	|    |
 * |		    |	|	    |		   |		    |	|    |
 * |   IN_PROGRESS  |---------------+------------->|    COMPLETE    |	|    |
 * |		    | (FW Boot OK)   (FW_READY OK) |		    |	|    |
 * ------------------				   ------------------	|     |
 *	^						|		|     |
 *	|						|		|     |
 * (FW Loading OK)			       (System Suspend/Runtime Suspend)
 *	|						|		|     |
 *	|						|		|     |
 * ------------------		------------------	|		|     |
 * |		    |		|		 |<-----+		|     |
 * |   PREPARE	    |		|   NOT_STARTED  |<---------------------+     |
 * |		    |		|		 |<---------------------------+
 * ------------------	|			   ------------------	|    |
 *	^		|				|		|    |
 *	|		|				|		|    |
 * (FW Loading OK)	|			(System Suspend/Runtime Suspend)
 *	|		|				|		|    |
 *	|	(FW Loading Fail)			|		|    |
 * ------------------	|	------------------	|		|    |
 * |		    |	|	|		 |<-----+		|    |
 * |   PREPARE	    |---+	|   NOT_STARTED  |<---------------------+    |
 * |		    |		|		 |<--------------------------+
 * ------------------		------------------
 *    |	    ^			    |	   ^
 *    |	    |			    |	   |
@@ -186,6 +213,7 @@ static int sof_probe_continue(struct snd_sof_dev *sdev)
	if (ret < 0) {
		dev_err(sdev->dev, "error: failed to load DSP firmware %d\n",
			ret);
		sof_set_fw_state(sdev, SOF_FW_BOOT_FAILED);
		goto fw_load_err;
	}

@@ -199,10 +227,11 @@ static int sof_probe_continue(struct snd_sof_dev *sdev)
	if (ret < 0) {
		dev_err(sdev->dev, "error: failed to boot DSP firmware %d\n",
			ret);
		sof_set_fw_state(sdev, SOF_FW_BOOT_FAILED);
		goto fw_run_err;
	}

	if (sof_core_debug & SOF_DBG_ENABLE_TRACE) {
	if (sof_debug_check_flag(SOF_DBG_ENABLE_TRACE)) {
		sdev->dtrace_is_supported = true;

		/* init DMA trace */
+23 −12
Original line number Diff line number Diff line
@@ -930,7 +930,7 @@ void snd_sof_free_debug(struct snd_sof_dev *sdev)
EXPORT_SYMBOL_GPL(snd_sof_free_debug);

static const struct soc_fw_state_info {
	enum snd_sof_fw_state state;
	enum sof_fw_state state;
	const char *name;
} fw_state_dbg[] = {
	{SOF_FW_BOOT_NOT_STARTED, "SOF_FW_BOOT_NOT_STARTED"},
@@ -938,37 +938,47 @@ static const struct soc_fw_state_info {
	{SOF_FW_BOOT_IN_PROGRESS, "SOF_FW_BOOT_IN_PROGRESS"},
	{SOF_FW_BOOT_FAILED, "SOF_FW_BOOT_FAILED"},
	{SOF_FW_BOOT_READY_FAILED, "SOF_FW_BOOT_READY_FAILED"},
	{SOF_FW_BOOT_READY_OK, "SOF_FW_BOOT_READY_OK"},
	{SOF_FW_BOOT_COMPLETE, "SOF_FW_BOOT_COMPLETE"},
	{SOF_FW_CRASHED, "SOF_FW_CRASHED"},
};

static void snd_sof_dbg_print_fw_state(struct snd_sof_dev *sdev)
static void snd_sof_dbg_print_fw_state(struct snd_sof_dev *sdev, const char *level)
{
	int i;

	for (i = 0; i < ARRAY_SIZE(fw_state_dbg); i++) {
		if (sdev->fw_state == fw_state_dbg[i].state) {
			dev_err(sdev->dev, "fw_state: %s (%d)\n", fw_state_dbg[i].name, i);
			dev_printk(level, sdev->dev, "fw_state: %s (%d)\n",
				   fw_state_dbg[i].name, i);
			return;
		}
	}

	dev_err(sdev->dev, "fw_state: UNKNOWN (%d)\n", sdev->fw_state);
	dev_printk(level, sdev->dev, "fw_state: UNKNOWN (%d)\n", sdev->fw_state);
}

void snd_sof_dsp_dbg_dump(struct snd_sof_dev *sdev, u32 flags)
void snd_sof_dsp_dbg_dump(struct snd_sof_dev *sdev, const char *msg, u32 flags)
{
	bool print_all = !!(sof_core_debug & SOF_DBG_PRINT_ALL_DUMPS);
	char *level = flags & SOF_DBG_DUMP_OPTIONAL ? KERN_DEBUG : KERN_ERR;
	bool print_all = sof_debug_check_flag(SOF_DBG_PRINT_ALL_DUMPS);

	if (flags & SOF_DBG_DUMP_OPTIONAL && !print_all)
		return;

	if (sof_ops(sdev)->dbg_dump && !sdev->dbg_dump_printed) {
		dev_err(sdev->dev, "------------[ DSP dump start ]------------\n");
		snd_sof_dbg_print_fw_state(sdev);
		dev_printk(level, sdev->dev,
			   "------------[ DSP dump start ]------------\n");
		if (msg)
			dev_printk(level, sdev->dev, "%s\n", msg);
		snd_sof_dbg_print_fw_state(sdev, level);
		sof_ops(sdev)->dbg_dump(sdev, flags);
		dev_err(sdev->dev, "------------[ DSP dump end ]------------\n");
		dev_printk(level, sdev->dev,
			   "------------[ DSP dump end ]------------\n");
		if (!print_all)
			sdev->dbg_dump_printed = true;
	} else if (msg) {
		dev_printk(level, sdev->dev, "%s\n", msg);
	}
}
EXPORT_SYMBOL(snd_sof_dsp_dbg_dump);
@@ -979,7 +989,7 @@ static void snd_sof_ipc_dump(struct snd_sof_dev *sdev)
		dev_err(sdev->dev, "------------[ IPC dump start ]------------\n");
		sof_ops(sdev)->ipc_dump(sdev);
		dev_err(sdev->dev, "------------[ IPC dump end ]------------\n");
		if (!(sof_core_debug & SOF_DBG_PRINT_ALL_DUMPS))
		if (!sof_debug_check_flag(SOF_DBG_PRINT_ALL_DUMPS))
			sdev->ipc_dump_printed = true;
	}
}
@@ -987,7 +997,7 @@ static void snd_sof_ipc_dump(struct snd_sof_dev *sdev)
void snd_sof_handle_fw_exception(struct snd_sof_dev *sdev)
{
	if (IS_ENABLED(CONFIG_SND_SOC_SOF_DEBUG_RETAIN_DSP_CONTEXT) ||
	    (sof_core_debug & SOF_DBG_RETAIN_CTX)) {
	    sof_debug_check_flag(SOF_DBG_RETAIN_CTX)) {
		/* should we prevent DSP entering D3 ? */
		if (!sdev->ipc_dump_printed)
			dev_info(sdev->dev,
@@ -997,7 +1007,8 @@ void snd_sof_handle_fw_exception(struct snd_sof_dev *sdev)

	/* dump vital information to the logs */
	snd_sof_ipc_dump(sdev);
	snd_sof_dsp_dbg_dump(sdev, SOF_DBG_DUMP_REGS | SOF_DBG_DUMP_MBOX);
	snd_sof_dsp_dbg_dump(sdev, "Firmware exception",
			     SOF_DBG_DUMP_REGS | SOF_DBG_DUMP_MBOX);
	snd_sof_trace_notify_for_error(sdev);
}
EXPORT_SYMBOL(snd_sof_handle_fw_exception);
+2 −2
Original line number Diff line number Diff line
@@ -69,8 +69,8 @@ void imx8_dump(struct snd_sof_dev *sdev, u32 flags)
			   IMX8_STACK_DUMP_SIZE);

	/* Print the information to the console */
	snd_sof_get_status(sdev, status, status, &xoops, &panic_info, stack,
			   IMX8_STACK_DUMP_SIZE);
	sof_print_oops_and_stack(sdev, KERN_ERR, status, status, &xoops,
				 &panic_info, stack, IMX8_STACK_DUMP_SIZE);
}
EXPORT_SYMBOL(imx8_dump);

+1 −1
Original line number Diff line number Diff line
@@ -97,7 +97,7 @@ static void imx8_dsp_handle_request(struct imx_dsp_ipc *ipc)

	/* Check to see if the message is a panic code (0x0dead***) */
	if ((p & SOF_IPC_PANIC_MAGIC_MASK) == SOF_IPC_PANIC_MAGIC)
		snd_sof_dsp_panic(priv->sdev, p);
		snd_sof_dsp_panic(priv->sdev, p, true);
	else
		snd_sof_ipc_msgs_rx(priv->sdev);
}
Loading