Commit bcbf1be0 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'udp_tunnel-convert-Intel-drivers-with-shared-tables'



Jakub Kicinski says:

====================
udp_tunnel: convert Intel drivers with shared tables

This set converts Intel drivers which have the ability to spawn
multiple netdevs, but have only one UDP tunnel port table.

Appropriate support is added to the core infra in patch 1,
followed by netdevsim support and a selftest.

The table sharing works by core attaching the same table
structure to all devices sharing the table. This means the
reference count has to accommodate potentially large values.

Once core is ready i40e and ice are converted. These are
complex drivers, but we got a tested-by from Aaron, so we
should be good :)

Compared to v1 I've made sure the selftest is executable.

Other than that patches 8 and 9 are actually from the Mellanox
conversion series were kept out to avoid Mellanox vs Intel
conflicts.

Last patch is new, some docs to let users knows ethtool
can now display UDP tunnel info.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 8744c0a8 33a1aaf8
Loading
Loading
Loading
Loading
+28 −0
Original line number Diff line number Diff line
@@ -58,3 +58,31 @@ forwarding table using the new bridge command.
3. Show forwarding table::

    # bridge fdb show dev vxlan0

The following NIC features may indicate support for UDP tunnel-related
offloads (most commonly VXLAN features, but support for a particular
encapsulation protocol is NIC specific):

 - `tx-udp_tnl-segmentation`
 - `tx-udp_tnl-csum-segmentation`
    ability to perform TCP segmentation offload of UDP encapsulated frames

 - `rx-udp_tunnel-port-offload`
    receive side parsing of UDP encapsulated frames which allows NICs to
    perform protocol-aware offloads, like checksum validation offload of
    inner frames (only needed by NICs without protocol-agnostic offloads)

For devices supporting `rx-udp_tunnel-port-offload` the list of currently
offloaded ports can be interrogated with `ethtool`::

  $ ethtool --show-tunnels eth0
  Tunnel information for eth0:
    UDP port table 0:
      Size: 4
      Types: vxlan
      No entries
    UDP port table 1:
      Size: 4
      Types: geneve, vxlan-gpe
      Entries (1):
          port 1230, vxlan-gpe
+3 −3
Original line number Diff line number Diff line
@@ -35,6 +35,7 @@
#include <net/pkt_cls.h>
#include <net/tc_act/tc_gact.h>
#include <net/tc_act/tc_mirred.h>
#include <net/udp_tunnel.h>
#include <net/xdp_sock.h>
#include "i40e_type.h"
#include "i40e_prototype.h"
@@ -133,7 +134,6 @@ enum i40e_state_t {
	__I40E_PORT_SUSPENDED,
	__I40E_VF_DISABLE,
	__I40E_MACVLAN_SYNC_PENDING,
	__I40E_UDP_FILTER_SYNC_PENDING,
	__I40E_TEMP_LINK_POLLING,
	__I40E_CLIENT_SERVICE_REQUESTED,
	__I40E_CLIENT_L2_CHANGE,
@@ -478,8 +478,8 @@ struct i40e_pf {
	struct list_head l3_flex_pit_list;
	struct list_head l4_flex_pit_list;

	struct i40e_udp_port_config udp_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS];
	u16 pending_udp_bitmap;
	struct udp_tunnel_nic_shared udp_tunnel_shared;
	struct udp_tunnel_nic_info udp_tunnel_nic;

	struct hlist_head cloud_filter_list;
	u16 num_cloud_filters;
+48 −216
Original line number Diff line number Diff line
@@ -10386,106 +10386,6 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
	i40e_flush(hw);
}

static const char *i40e_tunnel_name(u8 type)
{
	switch (type) {
	case UDP_TUNNEL_TYPE_VXLAN:
		return "vxlan";
	case UDP_TUNNEL_TYPE_GENEVE:
		return "geneve";
	default:
		return "unknown";
	}
}

/**
 * i40e_sync_udp_filters - Trigger a sync event for existing UDP filters
 * @pf: board private structure
 **/
static void i40e_sync_udp_filters(struct i40e_pf *pf)
{
	int i;

	/* loop through and set pending bit for all active UDP filters */
	for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) {
		if (pf->udp_ports[i].port)
			pf->pending_udp_bitmap |= BIT_ULL(i);
	}

	set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state);
}

/**
 * i40e_sync_udp_filters_subtask - Sync the VSI filter list with HW
 * @pf: board private structure
 **/
static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf)
{
	struct i40e_hw *hw = &pf->hw;
	u8 filter_index, type;
	u16 port;
	int i;

	if (!test_and_clear_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state))
		return;

	/* acquire RTNL to maintain state of flags and port requests */
	rtnl_lock();

	for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) {
		if (pf->pending_udp_bitmap & BIT_ULL(i)) {
			struct i40e_udp_port_config *udp_port;
			i40e_status ret = 0;

			udp_port = &pf->udp_ports[i];
			pf->pending_udp_bitmap &= ~BIT_ULL(i);

			port = READ_ONCE(udp_port->port);
			type = READ_ONCE(udp_port->type);
			filter_index = READ_ONCE(udp_port->filter_index);

			/* release RTNL while we wait on AQ command */
			rtnl_unlock();

			if (port)
				ret = i40e_aq_add_udp_tunnel(hw, port,
							     type,
							     &filter_index,
							     NULL);
			else if (filter_index != I40E_UDP_PORT_INDEX_UNUSED)
				ret = i40e_aq_del_udp_tunnel(hw, filter_index,
							     NULL);

			/* reacquire RTNL so we can update filter_index */
			rtnl_lock();

			if (ret) {
				dev_info(&pf->pdev->dev,
					 "%s %s port %d, index %d failed, err %s aq_err %s\n",
					 i40e_tunnel_name(type),
					 port ? "add" : "delete",
					 port,
					 filter_index,
					 i40e_stat_str(&pf->hw, ret),
					 i40e_aq_str(&pf->hw,
						     pf->hw.aq.asq_last_status));
				if (port) {
					/* failed to add, just reset port,
					 * drop pending bit for any deletion
					 */
					udp_port->port = 0;
					pf->pending_udp_bitmap &= ~BIT_ULL(i);
				}
			} else if (port) {
				/* record filter index on success */
				udp_port->filter_index = filter_index;
			}
		}
	}

	rtnl_unlock();
}

/**
 * i40e_service_task - Run the driver's async subtasks
 * @work: pointer to work_struct containing our data
@@ -10525,7 +10425,6 @@ static void i40e_service_task(struct work_struct *work)
								pf->vsi[pf->lan_vsi]);
		}
		i40e_sync_filters_subtask(pf);
		i40e_sync_udp_filters_subtask(pf);
	} else {
		i40e_reset_subtask(pf);
	}
@@ -12225,131 +12124,48 @@ static int i40e_set_features(struct net_device *netdev,
	return 0;
}

/**
 * i40e_get_udp_port_idx - Lookup a possibly offloaded for Rx UDP port
 * @pf: board private structure
 * @port: The UDP port to look up
 *
 * Returns the index number or I40E_MAX_PF_UDP_OFFLOAD_PORTS if port not found
 **/
static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, u16 port)
{
	u8 i;

	for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) {
		/* Do not report ports with pending deletions as
		 * being available.
		 */
		if (!port && (pf->pending_udp_bitmap & BIT_ULL(i)))
			continue;
		if (pf->udp_ports[i].port == port)
			return i;
	}

	return i;
}

/**
 * i40e_udp_tunnel_add - Get notifications about UDP tunnel ports that come up
 * @netdev: This physical port's netdev
 * @ti: Tunnel endpoint information
 **/
static void i40e_udp_tunnel_add(struct net_device *netdev,
static int i40e_udp_tunnel_set_port(struct net_device *netdev,
				    unsigned int table, unsigned int idx,
				    struct udp_tunnel_info *ti)
{
	struct i40e_netdev_priv *np = netdev_priv(netdev);
	struct i40e_vsi *vsi = np->vsi;
	struct i40e_pf *pf = vsi->back;
	u16 port = ntohs(ti->port);
	u8 next_idx;
	u8 idx;

	idx = i40e_get_udp_port_idx(pf, port);

	/* Check if port already exists */
	if (idx < I40E_MAX_PF_UDP_OFFLOAD_PORTS) {
		netdev_info(netdev, "port %d already offloaded\n", port);
		return;
	}

	/* Now check if there is space to add the new port */
	next_idx = i40e_get_udp_port_idx(pf, 0);
	struct i40e_hw *hw = &np->vsi->back->hw;
	u8 type, filter_index;
	i40e_status ret;

	if (next_idx == I40E_MAX_PF_UDP_OFFLOAD_PORTS) {
		netdev_info(netdev, "maximum number of offloaded UDP ports reached, not adding port %d\n",
			    port);
		return;
	}
	type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? I40E_AQC_TUNNEL_TYPE_VXLAN :
						   I40E_AQC_TUNNEL_TYPE_NGE;

	switch (ti->type) {
	case UDP_TUNNEL_TYPE_VXLAN:
		pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_VXLAN;
		break;
	case UDP_TUNNEL_TYPE_GENEVE:
		if (!(pf->hw_features & I40E_HW_GENEVE_OFFLOAD_CAPABLE))
			return;
		pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_NGE;
		break;
	default:
		return;
	ret = i40e_aq_add_udp_tunnel(hw, ntohs(ti->port), type, &filter_index,
				     NULL);
	if (ret) {
		netdev_info(netdev, "add UDP port failed, err %s aq_err %s\n",
			    i40e_stat_str(hw, ret),
			    i40e_aq_str(hw, hw->aq.asq_last_status));
		return -EIO;
	}

	/* New port: add it and mark its index in the bitmap */
	pf->udp_ports[next_idx].port = port;
	pf->udp_ports[next_idx].filter_index = I40E_UDP_PORT_INDEX_UNUSED;
	pf->pending_udp_bitmap |= BIT_ULL(next_idx);
	set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state);
	udp_tunnel_nic_set_port_priv(netdev, table, idx, filter_index);
	return 0;
}

/**
 * i40e_udp_tunnel_del - Get notifications about UDP tunnel ports that go away
 * @netdev: This physical port's netdev
 * @ti: Tunnel endpoint information
 **/
static void i40e_udp_tunnel_del(struct net_device *netdev,
static int i40e_udp_tunnel_unset_port(struct net_device *netdev,
				      unsigned int table, unsigned int idx,
				      struct udp_tunnel_info *ti)
{
	struct i40e_netdev_priv *np = netdev_priv(netdev);
	struct i40e_vsi *vsi = np->vsi;
	struct i40e_pf *pf = vsi->back;
	u16 port = ntohs(ti->port);
	u8 idx;

	idx = i40e_get_udp_port_idx(pf, port);

	/* Check if port already exists */
	if (idx >= I40E_MAX_PF_UDP_OFFLOAD_PORTS)
		goto not_found;
	struct i40e_hw *hw = &np->vsi->back->hw;
	i40e_status ret;

	switch (ti->type) {
	case UDP_TUNNEL_TYPE_VXLAN:
		if (pf->udp_ports[idx].type != I40E_AQC_TUNNEL_TYPE_VXLAN)
			goto not_found;
		break;
	case UDP_TUNNEL_TYPE_GENEVE:
		if (pf->udp_ports[idx].type != I40E_AQC_TUNNEL_TYPE_NGE)
			goto not_found;
		break;
	default:
		goto not_found;
	ret = i40e_aq_del_udp_tunnel(hw, ti->hw_priv, NULL);
	if (ret) {
		netdev_info(netdev, "delete UDP port failed, err %s aq_err %s\n",
			    i40e_stat_str(hw, ret),
			    i40e_aq_str(hw, hw->aq.asq_last_status));
		return -EIO;
	}

	/* if port exists, set it to 0 (mark for deletion)
	 * and make it pending
	 */
	pf->udp_ports[idx].port = 0;

	/* Toggle pending bit instead of setting it. This way if we are
	 * deleting a port that has yet to be added we just clear the pending
	 * bit and don't have to worry about it.
	 */
	pf->pending_udp_bitmap ^= BIT_ULL(idx);
	set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state);

	return;
not_found:
	netdev_warn(netdev, "UDP port %d was not found, not deleting\n",
		    port);
	return 0;
}

static int i40e_get_phys_port_id(struct net_device *netdev,
@@ -12955,8 +12771,8 @@ static const struct net_device_ops i40e_netdev_ops = {
	.ndo_set_vf_link_state	= i40e_ndo_set_vf_link_state,
	.ndo_set_vf_spoofchk	= i40e_ndo_set_vf_spoofchk,
	.ndo_set_vf_trust	= i40e_ndo_set_vf_trust,
	.ndo_udp_tunnel_add	= i40e_udp_tunnel_add,
	.ndo_udp_tunnel_del	= i40e_udp_tunnel_del,
	.ndo_udp_tunnel_add	= udp_tunnel_nic_add_port,
	.ndo_udp_tunnel_del	= udp_tunnel_nic_del_port,
	.ndo_get_phys_port_id	= i40e_get_phys_port_id,
	.ndo_fdb_add		= i40e_ndo_fdb_add,
	.ndo_features_check	= i40e_features_check,
@@ -13020,6 +12836,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
	if (!(pf->hw_features & I40E_HW_OUTER_UDP_CSUM_CAPABLE))
		netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;

	netdev->udp_tunnel_nic_info = &pf->udp_tunnel_nic;

	netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;

	netdev->hw_enc_features |= hw_enc_features;
@@ -14420,7 +14238,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit)
	i40e_ptp_init(pf);

	/* repopulate tunnel port filters */
	i40e_sync_udp_filters(pf);
	udp_tunnel_nic_reset_ntf(pf->vsi[pf->lan_vsi]->netdev);

	return ret;
}
@@ -15149,6 +14967,14 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
	if (err)
		goto err_switch_setup;

	pf->udp_tunnel_nic.set_port = i40e_udp_tunnel_set_port;
	pf->udp_tunnel_nic.unset_port = i40e_udp_tunnel_unset_port;
	pf->udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
	pf->udp_tunnel_nic.shared = &pf->udp_tunnel_shared;
	pf->udp_tunnel_nic.tables[0].n_entries = I40E_MAX_PF_UDP_OFFLOAD_PORTS;
	pf->udp_tunnel_nic.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN |
						    UDP_TUNNEL_TYPE_GENEVE;

	/* The number of VSIs reported by the FW is the minimum guaranteed
	 * to us; HW supports far more and we share the remaining pool with
	 * the other PFs. We allocate space for more than the guarantee with
@@ -15158,6 +14984,12 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
		pf->num_alloc_vsi = I40E_MIN_VSI_ALLOC;
	else
		pf->num_alloc_vsi = pf->hw.func_caps.num_vsis;
	if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) {
		dev_warn(&pf->pdev->dev,
			 "limiting the VSI count due to UDP tunnel limitation %d > %d\n",
			 pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES);
		pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES;
	}

	/* Set up the *vsi struct and our local tracking of the MAIN PF vsi. */
	pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *),
+2 −4
Original line number Diff line number Diff line
@@ -1268,8 +1268,7 @@ ice_fdir_write_all_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input,
		bool is_tun = tun == ICE_FD_HW_SEG_TUN;
		int err;

		if (is_tun && !ice_get_open_tunnel_port(&pf->hw, TNL_ALL,
							&port_num))
		if (is_tun && !ice_get_open_tunnel_port(&pf->hw, &port_num))
			continue;
		err = ice_fdir_write_fltr(pf, input, add, is_tun);
		if (err)
@@ -1647,8 +1646,7 @@ int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd)
	}

	/* return error if not an update and no available filters */
	fltrs_needed = ice_get_open_tunnel_port(hw, TNL_ALL, &tunnel_port) ?
		2 : 1;
	fltrs_needed = ice_get_open_tunnel_port(hw, &tunnel_port) ? 2 : 1;
	if (!ice_fdir_find_fltr_by_idx(hw, fsp->location) &&
	    ice_fdir_num_avail_fltr(hw, pf->vsi[vsi->idx]) < fltrs_needed) {
		dev_err(dev, "Failed to add filter.  The maximum number of flow director filters has been reached.\n");
+1 −1
Original line number Diff line number Diff line
@@ -556,7 +556,7 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
		memcpy(pkt, ice_fdir_pkt[idx].pkt, ice_fdir_pkt[idx].pkt_len);
		loc = pkt;
	} else {
		if (!ice_get_open_tunnel_port(hw, TNL_ALL, &tnl_port))
		if (!ice_get_open_tunnel_port(hw, &tnl_port))
			return ICE_ERR_DOES_NOT_EXIST;
		if (!ice_fdir_pkt[idx].tun_pkt)
			return ICE_ERR_PARAM;
Loading