Commit c83ae452 authored by David S. Miller's avatar David S. Miller
Browse files
Tony Nguyen says:

====================
ice: support dynamic interrupt allocation

Piotr Raczynski says:

This patchset reimplements MSIX interrupt allocation logic to allow dynamic
interrupt allocation after MSIX has been initially enabled. This allows
current and future features to allocate and free interrupts as needed and
will help to drastically decrease number of initially preallocated
interrupts (even down to the API hard limit of 1). Although this patchset
does not change behavior in terms of actual number of allocated interrupts
during probe, it will be subject to change.

First few patches prepares to introduce dynamic allocation by moving
interrupt allocation code to separate file and update allocation API used
in the driver to the currently preferred one.

Due to the current contract between ice and irdma driver which is directly
accessing msix entries allocated by ice driver, even after moving away from
older pci_enable_msix_range function, still keep msix_entries array for
irdma use.

Next patches refactors and removes redundant code from SRIOV related logic
as it also make it easier to move away from static allocation scheme.

Last patches actually enables dynamic allocation of MSIX interrupts. First,
introduce functions to allocate and free interrupts individually. This sets
ground for the rest of the changes even if that patch still allocates the
interrupts from the preallocated pool. Since this patch starts to keep
interrupt details in ice_q_vector structure we can get rid of functions
that calculates base vector number and register offset for the interrupt
as it is equal to the interrupt index. Only keep separate register offset
functions for the VF VSIs.

Next, replace homegrown interrupt tracker with much simpler xarray based
approach. As new API always allocate interrupts one by one, also track
interrupts in the same manner.

Lastly, extend the interrupt tracker to deal both with preallocated and
dynamically allocated vectors and use pci_msix_alloc_irq_at and
pci_msix_free_irq functions. Since not all architecture supports dynamic
allocation, check it before trying to allocate a new interrupt.

As previously mentioned, this patchset does not change number of initially
allocated interrupts during init phase but now it can and will likely be
changed.

Patch 1-3 -> move code around and use newer API
Patch 4-5 -> refactor and remove redundant SRIOV code
Patch 6   -> allocate every interrupt individually
Patch 7   -> replace homegrown interrupt tracker with xarray
Patch 8   -> allow dynamic interrupt allocation
---
v2:
Patch 4
 - simplify ice_vsi_setup_vector_base and account for num_avail_sw_msix
Patch 8
 - prevent q_vector leak in case vf ctrl VSI error

v1: https://lore.kernel.org/netdev/20230509170048.2235678-1-anthony.l.nguyen@intel.com/


====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 030d71fd 011670cc
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@ ice-y := ice_main.o \
	 ice_txrx_lib.o	\
	 ice_txrx.o	\
	 ice_fltr.o	\
	 ice_irq.o	\
	 ice_pf_vsi_vlan_ops.o \
	 ice_vsi_vlan_ops.o \
	 ice_vsi_vlan_lib.o \
+8 −16
Original line number Diff line number Diff line
@@ -74,6 +74,7 @@
#include "ice_lag.h"
#include "ice_vsi_vlan_ops.h"
#include "ice_gnss.h"
#include "ice_irq.h"

#define ICE_BAR0		0
#define ICE_REQ_DESC_MULTIPLE	32
@@ -103,11 +104,6 @@
#define ICE_Q_WAIT_RETRY_LIMIT	10
#define ICE_Q_WAIT_MAX_RETRY	(5 * ICE_Q_WAIT_RETRY_LIMIT)
#define ICE_MAX_LG_RSS_QS	256
#define ICE_RES_VALID_BIT	0x8000
#define ICE_RES_MISC_VEC_ID	(ICE_RES_VALID_BIT - 1)
#define ICE_RES_RDMA_VEC_ID	(ICE_RES_MISC_VEC_ID - 1)
/* All VF control VSIs share the same IRQ, so assign a unique ID for them */
#define ICE_RES_VF_CTRL_VEC_ID	(ICE_RES_RDMA_VEC_ID - 1)
#define ICE_INVAL_Q_INDEX	0xffff

#define ICE_MAX_RXQS_PER_TC		256	/* Used when setting VSI context per TC Rx queues */
@@ -245,12 +241,6 @@ struct ice_tc_cfg {
	struct ice_tc_info tc_info[ICE_MAX_TRAFFIC_CLASS];
};

struct ice_res_tracker {
	u16 num_entries;
	u16 end;
	u16 list[];
};

struct ice_qs_cfg {
	struct mutex *qs_mutex;  /* will be assigned to &pf->avail_q_mutex */
	unsigned long *pf_map;
@@ -348,7 +338,9 @@ struct ice_vsi {
	u32 rx_buf_failed;
	u32 rx_page_failed;
	u16 num_q_vectors;
	u16 base_vector;		/* IRQ base for OS reserved vectors */
	/* tell if only dynamic irq allocation is allowed */
	bool irq_dyn_alloc;

	enum ice_vsi_type type;
	u16 vsi_num;			/* HW (absolute) index of this VSI */
	u16 idx;			/* software index in pf->vsi[] */
@@ -479,6 +471,7 @@ struct ice_q_vector {
	char name[ICE_INT_NAME_STR_LEN];

	u16 total_events;	/* net_dim(): number of interrupts processed */
	struct msi_map irq;
} ____cacheline_internodealigned_in_smp;

enum ice_pf_flags {
@@ -539,7 +532,7 @@ struct ice_pf {

	/* OS reserved IRQ details */
	struct msix_entry *msix_entries;
	struct ice_res_tracker *irq_tracker;
	struct ice_irq_tracker irq_tracker;
	/* First MSIX vector used by SR-IOV VFs. Calculated by subtracting the
	 * number of MSIX vectors needed for all SR-IOV VFs from the number of
	 * MSIX vectors allowed on this PF.
@@ -583,8 +576,7 @@ struct ice_pf {

	u32 hw_csum_rx_error;
	u32 oicr_err_reg;
	u16 oicr_idx;		/* Other interrupt cause MSIX vector index */
	u16 num_avail_sw_msix;	/* remaining MSIX SW vectors left unclaimed */
	struct msi_map oicr_irq;	/* Other interrupt cause MSIX vector */
	u16 max_pf_txqs;	/* Total Tx queues PF wide */
	u16 max_pf_rxqs;	/* Total Rx queues PF wide */
	u16 num_lan_msix;	/* Total MSIX vectors for base driver */
@@ -670,7 +662,7 @@ ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi,
		    struct ice_q_vector *q_vector)
{
	u32 vector = (vsi && q_vector) ? q_vector->reg_idx :
				((struct ice_pf *)hw->back)->oicr_idx;
				((struct ice_pf *)hw->back)->oicr_irq.index;
	int itr = ICE_ITR_NONE;
	u32 val;

+2 −3
Original line number Diff line number Diff line
@@ -596,7 +596,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi)
{
	struct net_device *netdev;
	struct ice_pf *pf;
	int base_idx, i;
	int i;

	if (!vsi || vsi->type != ICE_VSI_PF)
		return 0;
@@ -613,10 +613,9 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi)
	if (unlikely(!netdev->rx_cpu_rmap))
		return -EINVAL;

	base_idx = vsi->base_vector;
	ice_for_each_q_vector(vsi, i)
		if (irq_cpu_rmap_add(netdev->rx_cpu_rmap,
				     pf->msix_entries[base_idx + i].vector)) {
				     vsi->q_vectors[i]->irq.virq)) {
			ice_free_cpu_rx_rmap(vsi);
			return -EINVAL;
		}
+46 −4
Original line number Diff line number Diff line
@@ -103,10 +103,10 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx)
{
	struct ice_pf *pf = vsi->back;
	struct ice_q_vector *q_vector;
	int err;

	/* allocate q_vector */
	q_vector = devm_kzalloc(ice_pf_to_dev(pf), sizeof(*q_vector),
				GFP_KERNEL);
	q_vector = kzalloc(sizeof(*q_vector), GFP_KERNEL);
	if (!q_vector)
		return -ENOMEM;

@@ -118,9 +118,34 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx)
	q_vector->rx.itr_mode = ITR_DYNAMIC;
	q_vector->tx.type = ICE_TX_CONTAINER;
	q_vector->rx.type = ICE_RX_CONTAINER;
	q_vector->irq.index = -ENOENT;

	if (vsi->type == ICE_VSI_VF)
	if (vsi->type == ICE_VSI_VF) {
		q_vector->reg_idx = ice_calc_vf_reg_idx(vsi->vf, q_vector);
		goto out;
	} else if (vsi->type == ICE_VSI_CTRL && vsi->vf) {
		struct ice_vsi *ctrl_vsi = ice_get_vf_ctrl_vsi(pf, vsi);

		if (ctrl_vsi) {
			if (unlikely(!ctrl_vsi->q_vectors)) {
				err = -ENOENT;
				goto err_free_q_vector;
			}

			q_vector->irq = ctrl_vsi->q_vectors[0]->irq;
			goto skip_alloc;
		}
	}

	q_vector->irq = ice_alloc_irq(pf, vsi->irq_dyn_alloc);
	if (q_vector->irq.index < 0) {
		err = -ENOMEM;
		goto err_free_q_vector;
	}

skip_alloc:
	q_vector->reg_idx = q_vector->irq.index;

	/* only set affinity_mask if the CPU is online */
	if (cpu_online(v_idx))
		cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
@@ -137,6 +162,11 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx)
	vsi->q_vectors[v_idx] = q_vector;

	return 0;

err_free_q_vector:
	kfree(q_vector);

	return err;
}

/**
@@ -168,7 +198,19 @@ static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
	if (vsi->netdev)
		netif_napi_del(&q_vector->napi);

	devm_kfree(dev, q_vector);
	/* release MSIX interrupt if q_vector had interrupt allocated */
	if (q_vector->irq.index < 0)
		goto free_q_vector;

	/* only free last VF ctrl vsi interrupt */
	if (vsi->type == ICE_VSI_CTRL && vsi->vf &&
	    ice_get_vf_ctrl_vsi(pf, vsi))
		goto free_q_vector;

	ice_free_irq(pf, q_vector->irq);

free_q_vector:
	kfree(q_vector);
	vsi->q_vectors[v_idx] = NULL;
}

+1 −1
Original line number Diff line number Diff line
@@ -956,7 +956,7 @@ static u64 ice_intr_test(struct net_device *netdev)

	netdev_info(netdev, "interrupt test\n");

	wr32(&pf->hw, GLINT_DYN_CTL(pf->oicr_idx),
	wr32(&pf->hw, GLINT_DYN_CTL(pf->oicr_irq.index),
	     GLINT_DYN_CTL_SW_ITR_INDX_M |
	     GLINT_DYN_CTL_INTENA_MSK_M |
	     GLINT_DYN_CTL_SWINT_TRIG_M);
Loading