Unverified Commit c07e067c authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!1268 [sync] PR-1070: crypto: hisilicon - fix some reset problem

Merge Pull Request from: @openeuler-sync-bot 
 

Origin pull request: 
https://gitee.com/openeuler/kernel/pulls/1070 
 
Fix some reset problem for accelerator drivers.

Weili Qian (5):
crypto: hisilicon/qm - flush all work before driver removed.
crypto: hisilicon/hpre - enable sva error interrupt event
crypto: hisilicon/qm - remove duplicate assignment and release
crypto: hisilicon/qm - disable same error report before resetting
crypto: hisilicon/qm - disable error report before flr 
 
Link:https://gitee.com/openeuler/kernel/pulls/1268

 

Reviewed-by: default avatarYang Shen <shenyang39@huawei.com>
Signed-off-by: default avatarJialin Zhang <zhangjialin11@huawei.com>
parents 4554a847 fedc2807
Loading
Loading
Loading
Loading
+25 −5
Original line number Diff line number Diff line
@@ -202,7 +202,7 @@ static const struct hisi_qm_cap_info hpre_basic_info[] = {
	{HPRE_QM_RESET_MASK_CAP, 0x3128, 0, GENMASK(31, 0), 0x0, 0xC37, 0x6C37},
	{HPRE_QM_OOO_SHUTDOWN_MASK_CAP, 0x3128, 0, GENMASK(31, 0), 0x0, 0x4, 0x6C37},
	{HPRE_QM_CE_MASK_CAP, 0x312C, 0, GENMASK(31, 0), 0x0, 0x8, 0x8},
	{HPRE_NFE_MASK_CAP, 0x3130, 0, GENMASK(31, 0), 0x0, 0x3FFFFE, 0xFFFFFE},
	{HPRE_NFE_MASK_CAP, 0x3130, 0, GENMASK(31, 0), 0x0, 0x3FFFFE, 0x1FFFFFE},
	{HPRE_RESET_MASK_CAP, 0x3134, 0, GENMASK(31, 0), 0x0, 0x3FFFFE, 0xBFFFFE},
	{HPRE_OOO_SHUTDOWN_MASK_CAP, 0x3134, 0, GENMASK(31, 0), 0x0, 0x22, 0xBFFFFE},
	{HPRE_CE_MASK_CAP, 0x3138, 0, GENMASK(31, 0), 0x0, 0x1, 0x1},
@@ -278,6 +278,9 @@ static const struct hpre_hw_error hpre_hw_errors[] = {
	}, {
		.int_msk = BIT(23),
		.msg = "sva_fsm_timeout_int_set"
	}, {
		.int_msk = BIT(24),
		.msg = "sva_int_set"
	}, {
		/* sentinel */
	}
@@ -1262,11 +1265,26 @@ static u32 hpre_get_hw_err_status(struct hisi_qm *qm)

static void hpre_clear_hw_err_status(struct hisi_qm *qm, u32 err_sts)
{
	u32 nfe;

	writel(err_sts, qm->io_base + HPRE_HAC_SOURCE_INT);
	nfe = hisi_qm_get_hw_info(qm, hpre_basic_info, HPRE_NFE_MASK_CAP, qm->cap_ver);
	writel(nfe, qm->io_base + HPRE_RAS_NFE_ENB);
}

static void hpre_disable_error_report(struct hisi_qm *qm, u32 err_type)
{
	u32 nfe_mask;

	nfe_mask = hisi_qm_get_hw_info(qm, hpre_basic_info, HPRE_NFE_MASK_CAP, qm->cap_ver);
	writel(nfe_mask & (~err_type), qm->io_base + HPRE_RAS_NFE_ENB);
}

static void hpre_enable_error_report(struct hisi_qm *qm)
{
	u32 nfe_mask, ce_mask;

	nfe_mask = hisi_qm_get_hw_info(qm, hpre_basic_info, HPRE_NFE_MASK_CAP, qm->cap_ver);
	ce_mask = hisi_qm_get_hw_info(qm, hpre_basic_info, HPRE_CE_MASK_CAP, qm->cap_ver);

	writel(nfe_mask, qm->io_base + HPRE_RAS_NFE_ENB);
	writel(ce_mask, qm->io_base + HPRE_RAS_CE_ENB);
}

static void hpre_open_axi_master_ooo(struct hisi_qm *qm)
@@ -1306,6 +1324,8 @@ static const struct hisi_qm_err_ini hpre_err_ini = {
	.hw_err_disable		= hpre_hw_error_disable,
	.get_dev_hw_err_status	= hpre_get_hw_err_status,
	.clear_dev_hw_err_status = hpre_clear_hw_err_status,
	.disable_error_report   = hpre_disable_error_report,
	.enable_error_report	= hpre_enable_error_report,
	.log_dev_hw_err		= hpre_log_hw_error,
	.open_axi_master_ooo	= hpre_open_axi_master_ooo,
	.open_sva_prefetch	= hpre_open_sva_prefetch,
+88 −46
Original line number Diff line number Diff line
@@ -206,7 +206,6 @@
#define WAIT_PERIOD			20
#define REMOVE_WAIT_DELAY		10

#define QM_DRIVER_REMOVING		0
#define QM_QOS_PARAM_NUM		2
#define QM_QOS_MAX_VAL			1000
#define QM_QOS_RATE			100
@@ -376,7 +375,7 @@ struct hisi_qm_hw_ops {
	int (*debug_init)(struct hisi_qm *qm);
	void (*hw_error_init)(struct hisi_qm *qm);
	void (*hw_error_uninit)(struct hisi_qm *qm);
	enum acc_err_result (*hw_error_handle)(struct hisi_qm *qm);
	enum acc_err_result (*hw_error_handle)(struct hisi_qm *qm, bool need_reset);
	int (*set_msi)(struct hisi_qm *qm, bool set);
};

@@ -1115,6 +1114,11 @@ static irqreturn_t qm_mb_cmd_irq(int irq, void *data)
	if (!val)
		return IRQ_NONE;

	if (test_bit(QM_DRIVER_DOWN, &qm->misc_ctl)) {
		dev_warn(&qm->pdev->dev, "Driver is down, message cannot be processed!\n");
		return IRQ_HANDLED;
	}

	schedule_work(&qm->cmd_process);

	return IRQ_HANDLED;
@@ -1565,24 +1569,35 @@ static void qm_log_hw_error(struct hisi_qm *qm, u32 error_status)
	}
}

static enum acc_err_result qm_hw_error_handle_v2(struct hisi_qm *qm)
static enum acc_err_result qm_hw_error_handle_v2(struct hisi_qm *qm, bool need_reset)
{
	u32 error_status, tmp;

	/* read err sts */
	tmp = readl(qm->io_base + QM_ABNORMAL_INT_STATUS);
	error_status = qm->error_mask & tmp;

	error_status = tmp & (~qm->err_info.qm_err_type);
	qm->err_info.qm_err_type |= tmp;
	if (error_status) {
		if (error_status & QM_ECC_MBIT)
			qm->err_status.is_qm_ecc_mbit = true;

		qm_log_hw_error(qm, error_status);
		if (error_status & qm->err_info.qm_reset_mask)
		/* If the device is ready to reset, only print new error type. */
		if (!need_reset)
			return ACC_ERR_RECOVERED;

		if (error_status & qm->err_info.qm_reset_mask) {
			/* Disable the same error reporting until the error is recovered. */
			writel(qm->err_info.nfe & (~qm->err_info.qm_err_type),
			       qm->io_base + QM_RAS_NFE_ENABLE);
			return ACC_ERR_NEED_RESET;
		}

		writel(error_status, qm->io_base + QM_ABNORMAL_INT_SOURCE);
		/* Clear error source if not need reset. */
		writel(qm->err_info.qm_err_type, qm->io_base + QM_ABNORMAL_INT_SOURCE);
		/* Avoid bios disable error type in v2 version, re-enable. */
		writel(qm->err_info.nfe, qm->io_base + QM_RAS_NFE_ENABLE);
		writel(qm->err_info.ce, qm->io_base + QM_RAS_CE_ENABLE);
	}

	return ACC_ERR_RECOVERED;
@@ -2800,7 +2815,6 @@ static int qm_alloc_uacce(struct hisi_qm *qm)
	qm->uacce = uacce;

	qm_uacce_base_init(qm);
	qm->uacce = uacce;
	INIT_LIST_HEAD(&qm->isolate_data.qm_hw_errs);
	mutex_init(&qm->isolate_data.isolate_lock);

@@ -2826,7 +2840,7 @@ EXPORT_SYMBOL_GPL(qm_register_uacce);
 */
static int qm_frozen(struct hisi_qm *qm)
{
	if (test_bit(QM_DRIVER_REMOVING, &qm->misc_ctl))
	if (test_bit(QM_DRIVER_DOWN, &qm->misc_ctl))
		return 0;

	down_write(&qm->qps_lock);
@@ -2834,7 +2848,7 @@ static int qm_frozen(struct hisi_qm *qm)
	if (!qm->qp_in_used) {
		qm->qp_in_used = qm->qp_num;
		up_write(&qm->qps_lock);
		set_bit(QM_DRIVER_REMOVING, &qm->misc_ctl);
		set_bit(QM_DRIVER_DOWN, &qm->misc_ctl);
		return 0;
	}

@@ -2891,6 +2905,9 @@ void hisi_qm_wait_task_finish(struct hisi_qm *qm, struct hisi_qm_list *qm_list)
	       test_bit(QM_RESETTING, &qm->misc_ctl))
		msleep(WAIT_PERIOD);

	if (test_bit(QM_SUPPORT_MB_COMMAND, &qm->caps))
		flush_work(&qm->cmd_process);

	udelay(REMOVE_WAIT_DELAY);
}
EXPORT_SYMBOL_GPL(hisi_qm_wait_task_finish);
@@ -3106,10 +3123,6 @@ void hisi_qm_uninit(struct hisi_qm *qm)
	qm_remove_uacce(qm);
	qm_irqs_unregister(qm);
	hisi_qm_pci_uninit(qm);
	if (qm->use_sva) {
		uacce_remove(qm->uacce);
		qm->uacce = NULL;
	}
}
EXPORT_SYMBOL_GPL(hisi_qm_uninit);

@@ -3444,14 +3457,14 @@ static void qm_hw_error_uninit(struct hisi_qm *qm)
	qm->ops->hw_error_uninit(qm);
}

static enum acc_err_result qm_hw_error_handle(struct hisi_qm *qm)
static enum acc_err_result qm_hw_error_handle(struct hisi_qm *qm, bool need_reset)
{
	if (!qm->ops->hw_error_handle) {
		dev_err(&qm->pdev->dev, "QM doesn't support hw error report!\n");
		return ACC_ERR_NONE;
	}

	return qm->ops->hw_error_handle(qm);
	return qm->ops->hw_error_handle(qm, need_reset);
}

/**
@@ -4075,17 +4088,19 @@ int hisi_qm_sriov_configure(struct pci_dev *pdev, int num_vfs)
}
EXPORT_SYMBOL_GPL(hisi_qm_sriov_configure);

static enum acc_err_result qm_dev_err_handle(struct hisi_qm *qm)
static enum acc_err_result qm_dev_err_handle(struct hisi_qm *qm, bool need_reset)
{
	u32 err_sts;
	u32 err_sts, tmp;

	if (!qm->err_ini->get_dev_hw_err_status) {
		dev_err(&qm->pdev->dev, "Device doesn't support get hw error status!\n");
		return ACC_ERR_NONE;
	}

	/* get device hardware error status */
	err_sts = qm->err_ini->get_dev_hw_err_status(qm);
	/* Get device hardware new error status */
	tmp = qm->err_ini->get_dev_hw_err_status(qm);
	err_sts = tmp & (~qm->err_info.dev_err_type);
	qm->err_info.dev_err_type |= tmp;
	if (err_sts) {
		if (err_sts & qm->err_info.ecc_2bits_mask)
			qm->err_status.is_dev_ecc_mbit = true;
@@ -4093,11 +4108,21 @@ static enum acc_err_result qm_dev_err_handle(struct hisi_qm *qm)
		if (qm->err_ini->log_dev_hw_err)
			qm->err_ini->log_dev_hw_err(qm, err_sts);

		if (err_sts & qm->err_info.dev_reset_mask)
		/* If the device is ready to reset, only print new error type. */
		if (!need_reset)
			return ACC_ERR_RECOVERED;

		if (err_sts & qm->err_info.dev_reset_mask) {
			/* Disable the same error reporting until the error is recovered. */
			qm->err_ini->disable_error_report(qm, qm->err_info.dev_err_type);
			return ACC_ERR_NEED_RESET;
		}

		if (qm->err_ini->clear_dev_hw_err_status)
			qm->err_ini->clear_dev_hw_err_status(qm, err_sts);
		/* Clear error source if not need reset. */
		if (qm->err_ini->clear_dev_hw_err_status) {
			qm->err_ini->clear_dev_hw_err_status(qm, qm->err_info.dev_err_type);
			qm->err_ini->enable_error_report(qm);
		}
	}

	return ACC_ERR_RECOVERED;
@@ -4106,16 +4131,25 @@ static enum acc_err_result qm_dev_err_handle(struct hisi_qm *qm)
static enum acc_err_result qm_process_dev_error(struct hisi_qm *qm)
{
	enum acc_err_result qm_ret, dev_ret;
	bool need_reset = true;

	if (!test_bit(QM_RST_SCHED, &qm->misc_ctl)) {
		qm->err_info.qm_err_type = 0;
		qm->err_info.dev_err_type = 0;
	} else {
		need_reset = false;
	}

	/* log qm error */
	qm_ret = qm_hw_error_handle(qm);
	qm_ret = qm_hw_error_handle(qm, need_reset);

	/* log device error */
	dev_ret = qm_dev_err_handle(qm);
	dev_ret = qm_dev_err_handle(qm, need_reset);
	if (need_reset && (qm_ret == ACC_ERR_NEED_RESET ||
	    dev_ret == ACC_ERR_NEED_RESET))
		return ACC_ERR_NEED_RESET;

	return (qm_ret == ACC_ERR_NEED_RESET ||
		dev_ret == ACC_ERR_NEED_RESET) ?
		ACC_ERR_NEED_RESET : ACC_ERR_RECOVERED;
	return ACC_ERR_RECOVERED;
}

/**
@@ -4315,8 +4349,6 @@ static int qm_controller_reset_prepare(struct hisi_qm *qm)
	if (ret)
		pci_err(pdev, "failed to stop by vfs in soft reset!\n");

	clear_bit(QM_RST_SCHED, &qm->misc_ctl);

	return 0;
}

@@ -4555,6 +4587,7 @@ static int qm_controller_reset_done(struct hisi_qm *qm)
			return ret;
		}
	}
	clear_bit(QM_RST_SCHED, &qm->misc_ctl);

	ret = qm_dev_hw_init(qm);
	if (ret) {
@@ -4628,10 +4661,11 @@ static int qm_controller_reset(struct hisi_qm *qm)

err_reset:
	pci_err(pdev, "Controller reset failed (%d)\n", ret);
	clear_bit(QM_RST_SCHED, &qm->misc_ctl);
	qm_reset_bit_clear(qm);

	/* if resetting fails, isolate the device */
	if (qm->use_sva)
	if (qm->use_uacce)
		qm->isolate_data.is_isolate = true;
	return ret;
}
@@ -4669,22 +4703,30 @@ void hisi_qm_reset_prepare(struct pci_dev *pdev)
	u32 delay = 0;
	int ret;

	hisi_qm_dev_err_uninit(pf_qm);
	while (true) {
		ret = qm_reset_prepare_ready(qm);
		if (ret) {
			pci_err(pdev, "FLR not ready!\n");
			return;
		}

		hisi_qm_dev_err_uninit(pf_qm);
		/*
	 * Check whether there is an ECC mbit error, If it occurs, need to
	 * wait for soft reset to fix it.
		 * Check whether there is an ECC mbit error,
		 * If it occurs, need to wait for soft reset
		 * to fix it.
		 */
	while (qm_check_dev_error(pf_qm)) {
		msleep(++delay);
		if (delay > QM_RESET_WAIT_TIMEOUT)
		if (qm_check_dev_error(pf_qm)) {
			qm_reset_bit_clear(qm);
			if (delay > QM_RESET_WAIT_TIMEOUT) {
				pci_err(pdev, "the hardware error was not recovered!\n");
				return;
			}

	ret = qm_reset_prepare_ready(qm);
	if (ret) {
		pci_err(pdev, "FLR not ready!\n");
		return;
			msleep(++delay);
		} else {
			break;
		}
	}

	/* PF obtains the information of VF by querying the register. */
@@ -4775,7 +4817,7 @@ static irqreturn_t qm_abnormal_irq(int irq, void *data)
	atomic64_inc(&qm->debug.dfx.abnormal_irq_cnt);
	ret = qm_process_dev_error(qm);
	if (ret == ACC_ERR_NEED_RESET &&
	    !test_bit(QM_DRIVER_REMOVING, &qm->misc_ctl) &&
	    !test_bit(QM_DRIVER_DOWN, &qm->misc_ctl) &&
	    !test_and_set_bit(QM_RST_SCHED, &qm->misc_ctl))
		schedule_work(&qm->rst_work);

+20 −4
Original line number Diff line number Diff line
@@ -1012,11 +1012,25 @@ static u32 sec_get_hw_err_status(struct hisi_qm *qm)

static void sec_clear_hw_err_status(struct hisi_qm *qm, u32 err_sts)
{
	u32 nfe;

	writel(err_sts, qm->io_base + SEC_CORE_INT_SOURCE);
	nfe = hisi_qm_get_hw_info(qm, sec_basic_info, SEC_NFE_MASK_CAP, qm->cap_ver);
	writel(nfe, qm->io_base + SEC_RAS_NFE_REG);
}

static void sec_disable_error_report(struct hisi_qm *qm, u32 err_type)
{
	u32 nfe_mask;

	nfe_mask = hisi_qm_get_hw_info(qm, sec_basic_info, SEC_NFE_MASK_CAP, qm->cap_ver);
	writel(nfe_mask & (~err_type), qm->io_base + SEC_RAS_NFE_REG);
}

static void sec_enable_error_report(struct hisi_qm *qm)
{
	u32 nfe_mask, ce_mask;

	nfe_mask = hisi_qm_get_hw_info(qm, sec_basic_info, SEC_NFE_MASK_CAP, qm->cap_ver);
	ce_mask = hisi_qm_get_hw_info(qm, sec_basic_info, SEC_CE_MASK_CAP, qm->cap_ver);
	writel(nfe_mask, qm->io_base + SEC_RAS_NFE_REG);
	writel(ce_mask, qm->io_base + SEC_RAS_CE_REG);
}

static void sec_open_axi_master_ooo(struct hisi_qm *qm)
@@ -1054,6 +1068,8 @@ static const struct hisi_qm_err_ini sec_err_ini = {
	.hw_err_disable		= sec_hw_error_disable,
	.get_dev_hw_err_status	= sec_get_hw_err_status,
	.clear_dev_hw_err_status = sec_clear_hw_err_status,
	.disable_error_report   = sec_disable_error_report,
	.enable_error_report    = sec_enable_error_report,
	.log_dev_hw_err		= sec_log_hw_error,
	.open_axi_master_ooo	= sec_open_axi_master_ooo,
	.open_sva_prefetch	= sec_open_sva_prefetch,
+20 −4
Original line number Diff line number Diff line
@@ -1008,11 +1008,25 @@ static u32 hisi_zip_get_hw_err_status(struct hisi_qm *qm)

static void hisi_zip_clear_hw_err_status(struct hisi_qm *qm, u32 err_sts)
{
	u32 nfe;

	writel(err_sts, qm->io_base + HZIP_CORE_INT_SOURCE);
	nfe = hisi_qm_get_hw_info(qm, zip_basic_cap_info, ZIP_NFE_MASK_CAP, qm->cap_ver);
	writel(nfe, qm->io_base + HZIP_CORE_INT_RAS_NFE_ENB);
}

static void hisi_zip_disable_error_report(struct hisi_qm *qm, u32 err_type)
{
	u32 nfe_mask;

	nfe_mask = hisi_qm_get_hw_info(qm, zip_basic_cap_info, ZIP_NFE_MASK_CAP, qm->cap_ver);
	writel(nfe_mask & (~err_type), qm->io_base + HZIP_CORE_INT_RAS_NFE_ENB);
}

static void hisi_zip_enable_error_report(struct hisi_qm *qm)
{
	u32 nfe_mask, ce_mask;

	nfe_mask = hisi_qm_get_hw_info(qm, zip_basic_cap_info, ZIP_NFE_MASK_CAP, qm->cap_ver);
	ce_mask = hisi_qm_get_hw_info(qm, zip_basic_cap_info, ZIP_CE_MASK_CAP, qm->cap_ver);
	writel(nfe_mask, qm->io_base + HZIP_CORE_INT_RAS_NFE_ENB);
	writel(ce_mask, qm->io_base + HZIP_CORE_INT_RAS_CE_ENB);
}

static void hisi_zip_open_axi_master_ooo(struct hisi_qm *qm)
@@ -1069,6 +1083,8 @@ static const struct hisi_qm_err_ini hisi_zip_err_ini = {
	.hw_err_disable		= hisi_zip_hw_error_disable,
	.get_dev_hw_err_status	= hisi_zip_get_hw_err_status,
	.clear_dev_hw_err_status = hisi_zip_clear_hw_err_status,
	.disable_error_report   = hisi_zip_disable_error_report,
	.enable_error_report    = hisi_zip_enable_error_report,
	.log_dev_hw_err		= hisi_zip_log_hw_error,
	.open_axi_master_ooo	= hisi_zip_open_axi_master_ooo,
	.close_axi_master_ooo	= hisi_zip_close_axi_master_ooo,
+4 −0
Original line number Diff line number Diff line
@@ -252,6 +252,8 @@ struct hisi_qm_err_info {
	u32 ce;
	u32 nfe;
	u32 fe;
	u32 qm_err_type;
	u32 dev_err_type;
};

struct hisi_qm_err_status {
@@ -265,6 +267,8 @@ struct hisi_qm_err_ini {
	void (*hw_err_disable)(struct hisi_qm *qm);
	u32 (*get_dev_hw_err_status)(struct hisi_qm *qm);
	void (*clear_dev_hw_err_status)(struct hisi_qm *qm, u32 err_sts);
	void (*disable_error_report)(struct hisi_qm *qm, u32 err_type);
	void (*enable_error_report)(struct hisi_qm *qm);
	void (*open_axi_master_ooo)(struct hisi_qm *qm);
	void (*close_axi_master_ooo)(struct hisi_qm *qm);
	void (*open_sva_prefetch)(struct hisi_qm *qm);