Commit 1c753652 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'hns3-RAS'



Guangbin Huang says:

====================
net: hns3: add RAS compatibility adaptation solution

This patchset adds RAS compatibility adaptation solution for new devices.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents e0eb625a 1c360a4a
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -91,6 +91,7 @@ enum HNAE3_DEV_CAP_BITS {
	HNAE3_DEV_SUPPORT_STASH_B,
	HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B,
	HNAE3_DEV_SUPPORT_PAUSE_B,
	HNAE3_DEV_SUPPORT_RAS_IMP_B,
	HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B,
	HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B,
	HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B,
@@ -129,6 +130,9 @@ enum HNAE3_DEV_CAP_BITS {
#define hnae3_dev_phy_imp_supported(hdev) \
	test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, (hdev)->ae_dev->caps)

#define hnae3_dev_ras_imp_supported(hdev) \
	test_bit(HNAE3_DEV_SUPPORT_RAS_IMP_B, (hdev)->ae_dev->caps)

#define hnae3_dev_tqp_txrx_indep_supported(hdev) \
	test_bit(HNAE3_DEV_SUPPORT_TQP_TXRX_INDEP_B, (hdev)->ae_dev->caps)

+3 −0
Original line number Diff line number Diff line
@@ -349,6 +349,9 @@ static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
	}, {
		.name = "support imp-controlled PHY",
		.cap_bit = HNAE3_DEV_SUPPORT_PHY_IMP_B,
	}, {
		.name = "support imp-controlled RAS",
		.cap_bit = HNAE3_DEV_SUPPORT_RAS_IMP_B,
	}, {
		.name = "support rxd advanced layout",
		.cap_bit = HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B,
+4 −1
Original line number Diff line number Diff line
@@ -178,7 +178,8 @@ static bool hclge_is_special_opcode(u16 opcode)
			     HCLGE_QUERY_CLEAR_MPF_RAS_INT,
			     HCLGE_QUERY_CLEAR_PF_RAS_INT,
			     HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT,
			     HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT};
			     HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT,
			     HCLGE_QUERY_ALL_ERR_INFO};
	int i;

	for (i = 0; i < ARRAY_SIZE(spec_opcode); i++) {
@@ -386,6 +387,8 @@ static void hclge_parse_capability(struct hclge_dev *hdev,
		set_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps);
	if (hnae3_get_bit(caps, HCLGE_CAP_PHY_IMP_B))
		set_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps);
	if (hnae3_get_bit(caps, HCLGE_CAP_RAS_IMP_B))
		set_bit(HNAE3_DEV_SUPPORT_RAS_IMP_B, ae_dev->caps);
	if (hnae3_get_bit(caps, HCLGE_CAP_RXD_ADV_LAYOUT_B))
		set_bit(HNAE3_DEV_SUPPORT_RXD_ADV_LAYOUT_B, ae_dev->caps);
	if (hnae3_get_bit(caps, HCLGE_CAP_PORT_VLAN_BYPASS_B)) {
+3 −0
Original line number Diff line number Diff line
@@ -293,6 +293,8 @@ enum hclge_opcode_type {
	HCLGE_QUERY_MSIX_INT_STS_BD_NUM	= 0x1513,
	HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT	= 0x1514,
	HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT	= 0x1515,
	HCLGE_QUERY_ALL_ERR_BD_NUM		= 0x1516,
	HCLGE_QUERY_ALL_ERR_INFO		= 0x1517,
	HCLGE_CONFIG_ROCEE_RAS_INT_EN	= 0x1580,
	HCLGE_QUERY_CLEAR_ROCEE_RAS_INT = 0x1581,
	HCLGE_ROCEE_PF_RAS_INT_CMD	= 0x1584,
@@ -390,6 +392,7 @@ enum HCLGE_CAP_BITS {
	HCLGE_CAP_HW_PAD_B,
	HCLGE_CAP_STASH_B,
	HCLGE_CAP_UDP_TUNNEL_CSUM_B,
	HCLGE_CAP_RAS_IMP_B = 12,
	HCLGE_CAP_FEC_B = 13,
	HCLGE_CAP_PAUSE_B = 14,
	HCLGE_CAP_RXD_ADV_LAYOUT_B = 15,
+382 −28
Original line number Diff line number Diff line
@@ -631,6 +631,134 @@ static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = {
	{ /* sentinel */ }
};

static const struct hclge_hw_module_id hclge_hw_module_id_st[] = {
	{
		.module_id = MODULE_NONE,
		.msg = "MODULE_NONE"
	}, {
		.module_id = MODULE_BIOS_COMMON,
		.msg = "MODULE_BIOS_COMMON"
	}, {
		.module_id = MODULE_GE,
		.msg = "MODULE_GE"
	}, {
		.module_id = MODULE_IGU_EGU,
		.msg = "MODULE_IGU_EGU"
	}, {
		.module_id = MODULE_LGE,
		.msg = "MODULE_LGE"
	}, {
		.module_id = MODULE_NCSI,
		.msg = "MODULE_NCSI"
	}, {
		.module_id = MODULE_PPP,
		.msg = "MODULE_PPP"
	}, {
		.module_id = MODULE_QCN,
		.msg = "MODULE_QCN"
	}, {
		.module_id = MODULE_RCB_RX,
		.msg = "MODULE_RCB_RX"
	}, {
		.module_id = MODULE_RTC,
		.msg = "MODULE_RTC"
	}, {
		.module_id = MODULE_SSU,
		.msg = "MODULE_SSU"
	}, {
		.module_id = MODULE_TM,
		.msg = "MODULE_TM"
	}, {
		.module_id = MODULE_RCB_TX,
		.msg = "MODULE_RCB_TX"
	}, {
		.module_id = MODULE_TXDMA,
		.msg = "MODULE_TXDMA"
	}, {
		.module_id = MODULE_MASTER,
		.msg = "MODULE_MASTER"
	}, {
		.module_id = MODULE_ROCEE_TOP,
		.msg = "MODULE_ROCEE_TOP"
	}, {
		.module_id = MODULE_ROCEE_TIMER,
		.msg = "MODULE_ROCEE_TIMER"
	}, {
		.module_id = MODULE_ROCEE_MDB,
		.msg = "MODULE_ROCEE_MDB"
	}, {
		.module_id = MODULE_ROCEE_TSP,
		.msg = "MODULE_ROCEE_TSP"
	}, {
		.module_id = MODULE_ROCEE_TRP,
		.msg = "MODULE_ROCEE_TRP"
	}, {
		.module_id = MODULE_ROCEE_SCC,
		.msg = "MODULE_ROCEE_SCC"
	}, {
		.module_id = MODULE_ROCEE_CAEP,
		.msg = "MODULE_ROCEE_CAEP"
	}, {
		.module_id = MODULE_ROCEE_GEN_AC,
		.msg = "MODULE_ROCEE_GEN_AC"
	}, {
		.module_id = MODULE_ROCEE_QMM,
		.msg = "MODULE_ROCEE_QMM"
	}, {
		.module_id = MODULE_ROCEE_LSAN,
		.msg = "MODULE_ROCEE_LSAN"
	}
};

static const struct hclge_hw_type_id hclge_hw_type_id_st[] = {
	{
		.type_id = NONE_ERROR,
		.msg = "none_error"
	}, {
		.type_id = FIFO_ERROR,
		.msg = "fifo_error"
	}, {
		.type_id = MEMORY_ERROR,
		.msg = "memory_error"
	}, {
		.type_id = POISON_ERROR,
		.msg = "poison_error"
	}, {
		.type_id = MSIX_ECC_ERROR,
		.msg = "msix_ecc_error"
	}, {
		.type_id = TQP_INT_ECC_ERROR,
		.msg = "tqp_int_ecc_error"
	}, {
		.type_id = PF_ABNORMAL_INT_ERROR,
		.msg = "pf_abnormal_int_error"
	}, {
		.type_id = MPF_ABNORMAL_INT_ERROR,
		.msg = "mpf_abnormal_int_error"
	}, {
		.type_id = COMMON_ERROR,
		.msg = "common_error"
	}, {
		.type_id = PORT_ERROR,
		.msg = "port_error"
	}, {
		.type_id = ETS_ERROR,
		.msg = "ets_error"
	}, {
		.type_id = NCSI_ERROR,
		.msg = "ncsi_error"
	}, {
		.type_id = GLB_ERROR,
		.msg = "glb_error"
	}, {
		.type_id = ROCEE_NORMAL_ERR,
		.msg = "rocee_normal_error"
	}, {
		.type_id = ROCEE_OVF_ERR,
		.msg = "rocee_ovf_error"
	}
};

static void hclge_log_error(struct device *dev, char *reg,
			    const struct hclge_hw_error *err,
			    u32 err_sts, unsigned long *reset_requests)
@@ -1611,11 +1739,27 @@ static const struct hclge_hw_blk hw_blk[] = {
	{ /* sentinel */ }
};

static void hclge_config_all_msix_error(struct hclge_dev *hdev, bool enable)
{
	u32 reg_val;

	reg_val = hclge_read_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG);

	if (enable)
		reg_val |= BIT(HCLGE_VECTOR0_ALL_MSIX_ERR_B);
	else
		reg_val &= ~BIT(HCLGE_VECTOR0_ALL_MSIX_ERR_B);

	hclge_write_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG, reg_val);
}

int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state)
{
	const struct hclge_hw_blk *module = hw_blk;
	int ret = 0;

	hclge_config_all_msix_error(hdev, state);

	while (module->name) {
		if (module->config_err_int) {
			ret = module->config_err_int(hdev, state);
@@ -1876,11 +2020,8 @@ static int hclge_handle_pf_msix_error(struct hclge_dev *hdev,
static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev,
					  unsigned long *reset_requests)
{
	struct hclge_mac_tnl_stats mac_tnl_stats;
	struct device *dev = &hdev->pdev->dev;
	u32 mpf_bd_num, pf_bd_num, bd_num;
	struct hclge_desc *desc;
	u32 status;
	int ret;

	/* query the number of bds for the MSIx int status */
@@ -1903,29 +2044,7 @@ static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev,
	if (ret)
		goto msi_error;

	/* query and clear mac tnl interruptions */
	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_MAC_TNL_INT,
				   true);
	ret = hclge_cmd_send(&hdev->hw, &desc[0], 1);
	if (ret) {
		dev_err(dev, "query mac tnl int cmd failed (%d)\n", ret);
		goto msi_error;
	}

	status = le32_to_cpu(desc->data[0]);
	if (status) {
		/* When mac tnl interrupt occurs, we record current time and
		 * register status here in a fifo, then clear the status. So
		 * that if link status changes suddenly at some time, we can
		 * query them by debugfs.
		 */
		mac_tnl_stats.time = local_clock();
		mac_tnl_stats.status = status;
		kfifo_put(&hdev->mac_tnl_log, mac_tnl_stats);
		ret = hclge_clear_mac_tnl_int(hdev);
		if (ret)
			dev_err(dev, "clear mac tnl int failed (%d)\n", ret);
	}
	ret = hclge_handle_mac_tnl(hdev);

msi_error:
	kfree(desc);
@@ -1947,10 +2066,43 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev,
	return hclge_handle_all_hw_msix_error(hdev, reset_requests);
}

void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev)
int hclge_handle_mac_tnl(struct hclge_dev *hdev)
{
#define HCLGE_DESC_NO_DATA_LEN 8
	struct hclge_mac_tnl_stats mac_tnl_stats;
	struct device *dev = &hdev->pdev->dev;
	struct hclge_desc desc;
	u32 status;
	int ret;

	/* query and clear mac tnl interruptions */
	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_MAC_TNL_INT, true);
	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
	if (ret) {
		dev_err(dev, "failed to query mac tnl int, ret = %d.\n", ret);
		return ret;
	}

	status = le32_to_cpu(desc.data[0]);
	if (status) {
		/* When mac tnl interrupt occurs, we record current time and
		 * register status here in a fifo, then clear the status. So
		 * that if link status changes suddenly at some time, we can
		 * query them by debugfs.
		 */
		mac_tnl_stats.time = local_clock();
		mac_tnl_stats.status = status;
		kfifo_put(&hdev->mac_tnl_log, mac_tnl_stats);
		ret = hclge_clear_mac_tnl_int(hdev);
		if (ret)
			dev_err(dev, "failed to clear mac tnl int, ret = %d.\n",
				ret);
	}

	return ret;
}

void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev)
{
	struct hclge_dev *hdev = ae_dev->priv;
	struct device *dev = &hdev->pdev->dev;
	u32 mpf_bd_num, pf_bd_num, bd_num;
@@ -1999,3 +2151,205 @@ void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev)
msi_error:
	kfree(desc);
}

bool hclge_find_error_source(struct hclge_dev *hdev)
{
	u32 msix_src_flag, hw_err_src_flag;

	msix_src_flag = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS) &
			HCLGE_VECTOR0_REG_MSIX_MASK;

	hw_err_src_flag = hclge_read_dev(&hdev->hw,
					 HCLGE_RAS_PF_OTHER_INT_STS_REG) &
			  HCLGE_RAS_REG_ERR_MASK;

	return msix_src_flag || hw_err_src_flag;
}

void hclge_handle_occurred_error(struct hclge_dev *hdev)
{
	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);

	if (hclge_find_error_source(hdev))
		hclge_handle_error_info_log(ae_dev);
}

static void
hclge_handle_error_type_reg_log(struct device *dev,
				struct hclge_mod_err_info *mod_info,
				struct hclge_type_reg_err_info *type_reg_info)
{
#define HCLGE_ERR_TYPE_MASK 0x7F
#define HCLGE_ERR_TYPE_IS_RAS_OFFSET 7

	u8 mod_id, total_module, type_id, total_type, i, is_ras;
	u8 index_module = MODULE_NONE;
	u8 index_type = NONE_ERROR;

	mod_id = mod_info->mod_id;
	type_id = type_reg_info->type_id & HCLGE_ERR_TYPE_MASK;
	is_ras = type_reg_info->type_id >> HCLGE_ERR_TYPE_IS_RAS_OFFSET;

	total_module = ARRAY_SIZE(hclge_hw_module_id_st);
	total_type = ARRAY_SIZE(hclge_hw_type_id_st);

	for (i = 0; i < total_module; i++) {
		if (mod_id == hclge_hw_module_id_st[i].module_id) {
			index_module = i;
			break;
		}
	}

	for (i = 0; i < total_type; i++) {
		if (type_id == hclge_hw_type_id_st[i].type_id) {
			index_type = i;
			break;
		}
	}

	if (index_module != MODULE_NONE && index_type != NONE_ERROR)
		dev_err(dev,
			"found %s %s, is %s error.\n",
			hclge_hw_module_id_st[index_module].msg,
			hclge_hw_type_id_st[index_type].msg,
			is_ras ? "ras" : "msix");
	else
		dev_err(dev,
			"unknown module[%u] or type[%u].\n", mod_id, type_id);

	dev_err(dev, "reg_value:\n");
	for (i = 0; i < type_reg_info->reg_num; i++)
		dev_err(dev, "0x%08x\n", type_reg_info->hclge_reg[i]);
}

static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev,
					  const u32 *buf, u32 buf_size)
{
	struct hclge_type_reg_err_info *type_reg_info;
	struct hclge_dev *hdev = ae_dev->priv;
	struct device *dev = &hdev->pdev->dev;
	struct hclge_mod_err_info *mod_info;
	struct hclge_sum_err_info *sum_info;
	u8 mod_num, err_num, i;
	u32 offset = 0;

	sum_info = (struct hclge_sum_err_info *)&buf[offset++];
	if (sum_info->reset_type &&
	    sum_info->reset_type != HNAE3_NONE_RESET)
		set_bit(sum_info->reset_type, &ae_dev->hw_err_reset_req);
	mod_num = sum_info->mod_num;

	while (mod_num--) {
		if (offset >= buf_size) {
			dev_err(dev, "The offset(%u) exceeds buf's size(%u).\n",
				offset, buf_size);
			return;
		}
		mod_info = (struct hclge_mod_err_info *)&buf[offset++];
		err_num = mod_info->err_num;

		for (i = 0; i < err_num; i++) {
			if (offset >= buf_size) {
				dev_err(dev,
					"The offset(%u) exceeds buf size(%u).\n",
					offset, buf_size);
				return;
			}

			type_reg_info = (struct hclge_type_reg_err_info *)
					    &buf[offset++];
			hclge_handle_error_type_reg_log(dev, mod_info,
							type_reg_info);

			offset += type_reg_info->reg_num;
		}
	}
}

static int hclge_query_all_err_bd_num(struct hclge_dev *hdev, u32 *bd_num)
{
	struct device *dev = &hdev->pdev->dev;
	struct hclge_desc desc_bd;
	int ret;

	hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_ALL_ERR_BD_NUM, true);
	ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1);
	if (ret) {
		dev_err(dev, "failed to query error bd_num, ret = %d.\n", ret);
		return ret;
	}

	*bd_num = le32_to_cpu(desc_bd.data[0]);
	if (!(*bd_num)) {
		dev_err(dev, "The value of bd_num is 0!\n");
		return -EINVAL;
	}

	return 0;
}

static int hclge_query_all_err_info(struct hclge_dev *hdev,
				    struct hclge_desc *desc, u32 bd_num)
{
	struct device *dev = &hdev->pdev->dev;
	int ret;

	hclge_cmd_setup_basic_desc(desc, HCLGE_QUERY_ALL_ERR_INFO, true);
	ret = hclge_cmd_send(&hdev->hw, desc, bd_num);
	if (ret)
		dev_err(dev, "failed to query error info, ret = %d.\n", ret);

	return ret;
}

int hclge_handle_error_info_log(struct hnae3_ae_dev *ae_dev)
{
	u32 bd_num, desc_len, buf_len, buf_size, i;
	struct hclge_dev *hdev = ae_dev->priv;
	struct hclge_desc *desc;
	__le32 *desc_data;
	u32 *buf;
	int ret;

	ret = hclge_query_all_err_bd_num(hdev, &bd_num);
	if (ret)
		goto out;

	desc_len = bd_num * sizeof(struct hclge_desc);
	desc = kzalloc(desc_len, GFP_KERNEL);
	if (!desc) {
		ret = -ENOMEM;
		goto out;
	}

	ret = hclge_query_all_err_info(hdev, desc, bd_num);
	if (ret)
		goto err_desc;

	buf_len = bd_num * sizeof(struct hclge_desc) - HCLGE_DESC_NO_DATA_LEN;
	buf_size = buf_len / sizeof(u32);

	desc_data = kzalloc(buf_len, GFP_KERNEL);
	if (!desc_data)
		return -ENOMEM;

	buf = kzalloc(buf_len, GFP_KERNEL);
	if (!buf) {
		ret = -ENOMEM;
		goto err_buf_alloc;
	}

	memcpy(desc_data, &desc[0].data[0], buf_len);
	for (i = 0; i < buf_size; i++)
		buf[i] = le32_to_cpu(desc_data[i]);

	hclge_handle_error_module_log(ae_dev, buf, buf_size);
	kfree(buf);

err_buf_alloc:
	kfree(desc_data);
err_desc:
	kfree(desc);
out:
	return ret;
}
Loading