Commit 6b5525c8 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'sfc-encap-offloads-on-EF10'



Edward Cree says:

====================
sfc: encap offloads on EF10

EF10 NICs from the 8000 series onwards support TX offloads (checksumming,
 TSO) on VXLAN- and NVGRE-encapsulated packets.  This series adds driver
 support for these offloads.

Changes from v1:
 * Fix 'no TXQ of type' error handling in patch #1 (and clear up the
   misleading comment that inspired the wrong version)
 * Add comment in patch #5 explaining what the deal with TSOv3 is
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 4a681bf3 24b2c375
Loading
Loading
Loading
Loading
+85 −39
Original line number Diff line number Diff line
@@ -601,9 +601,13 @@ static int efx_ef10_probe(struct efx_nic *efx)
	efx_ef10_read_licensed_features(efx);

	/* We can have one VI for each vi_stride-byte region.
	 * However, until we use TX option descriptors we need two TX queues
	 * per channel.
	 * However, until we use TX option descriptors we need up to four
	 * TX queues per channel for different checksumming combinations.
	 */
	if (nic_data->datapath_caps &
	    (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN))
		efx->tx_queues_per_channel = 4;
	else
		efx->tx_queues_per_channel = 2;
	efx->max_vis = efx_ef10_mem_map_size(efx) / efx->vi_stride;
	if (!efx->max_vis) {
@@ -1300,6 +1304,7 @@ static void efx_ef10_fini_nic(struct efx_nic *efx)
static int efx_ef10_init_nic(struct efx_nic *efx)
{
	struct efx_ef10_nic_data *nic_data = efx->nic_data;
	netdev_features_t hw_enc_features = 0;
	int rc;

	if (nic_data->must_check_datapath_caps) {
@@ -1344,6 +1349,21 @@ static int efx_ef10_init_nic(struct efx_nic *efx)
		nic_data->must_restore_piobufs = false;
	}

	/* add encapsulated checksum offload features */
	if (efx_has_cap(efx, VXLAN_NVGRE) && !efx_ef10_is_vf(efx))
		hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
	/* add encapsulated TSO features */
	if (efx_has_cap(efx, TX_TSO_V2_ENCAP)) {
		netdev_features_t encap_tso_features;

		encap_tso_features = NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE |
			NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_GRE_CSUM;

		hw_enc_features |= encap_tso_features | NETIF_F_TSO;
		efx->net_dev->features |= encap_tso_features;
	}
	efx->net_dev->hw_enc_features = hw_enc_features;

	/* don't fail init if RSS setup doesn't work */
	rc = efx->type->rx_push_rss_config(efx, false,
					   efx->rss_context.rx_indir_table, NULL);
@@ -2146,6 +2166,9 @@ static int efx_ef10_irq_test_generate(struct efx_nic *efx)

static int efx_ef10_tx_probe(struct efx_tx_queue *tx_queue)
{
	/* low two bits of label are what we want for type */
	BUILD_BUG_ON((EFX_TXQ_TYPE_OUTER_CSUM | EFX_TXQ_TYPE_INNER_CSUM) != 3);
	tx_queue->type = tx_queue->label & 3;
	return efx_nic_alloc_buffer(tx_queue->efx, &tx_queue->txd.buf,
				    (tx_queue->ptr_mask + 1) *
				    sizeof(efx_qword_t),
@@ -2168,15 +2191,15 @@ static inline void efx_ef10_push_tx_desc(struct efx_tx_queue *tx_queue,

/* Add Firmware-Assisted TSO v2 option descriptors to a queue.
 */
static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue,
				struct sk_buff *skb,
int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
			 bool *data_mapped)
{
	struct efx_tx_buffer *buffer;
	u16 inner_ipv4_id = 0;
	u16 outer_ipv4_id = 0;
	struct tcphdr *tcp;
	struct iphdr *ip;

	u16 ipv4_id;
	u16 ip_tot_len;
	u32 seqnum;
	u32 mss;

@@ -2189,21 +2212,43 @@ static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue,
		return -EINVAL;
	}

	if (skb->encapsulation) {
		if (!tx_queue->tso_encap)
			return -EINVAL;
		ip = ip_hdr(skb);
		if (ip->version == 4)
			outer_ipv4_id = ntohs(ip->id);

		ip = inner_ip_hdr(skb);
		tcp = inner_tcp_hdr(skb);
	} else {
		ip = ip_hdr(skb);
		tcp = tcp_hdr(skb);
	}

	/* 8000-series EF10 hardware requires that IP Total Length be
	 * greater than or equal to the value it will have in each segment
	 * (which is at most mss + 208 + TCP header length), but also less
	 * than (0x10000 - inner_network_header).  Otherwise the TCP
	 * checksum calculation will be broken for encapsulated packets.
	 * We fill in ip->tot_len with 0xff30, which should satisfy the
	 * first requirement unless the MSS is ridiculously large (which
	 * should be impossible as the driver max MTU is 9216); it is
	 * guaranteed to satisfy the second as we only attempt TSO if
	 * inner_network_header <= 208.
	 */
	ip_tot_len = -EFX_TSO2_MAX_HDRLEN;
	EFX_WARN_ON_ONCE_PARANOID(mss + EFX_TSO2_MAX_HDRLEN +
				  (tcp->doff << 2u) > ip_tot_len);

	if (ip->version == 4) {
		/* Modify IPv4 header if needed. */
		ip->tot_len = 0;
		ip->tot_len = htons(ip_tot_len);
		ip->check = 0;
		ipv4_id = ntohs(ip->id);
		inner_ipv4_id = ntohs(ip->id);
	} else {
		/* Modify IPv6 header if needed. */
		struct ipv6hdr *ipv6 = ipv6_hdr(skb);

		ipv6->payload_len = 0;
		ipv4_id = 0;
		((struct ipv6hdr *)ip)->payload_len = htons(ip_tot_len);
	}

	tcp = tcp_hdr(skb);
	seqnum = ntohl(tcp->seq);

	buffer = efx_tx_queue_get_insert_buffer(tx_queue);
@@ -2216,7 +2261,7 @@ static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue,
			ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_TSO,
			ESF_DZ_TX_TSO_OPTION_TYPE,
			ESE_DZ_TX_TSO_OPTION_DESC_FATSO2A,
			ESF_DZ_TX_TSO_IP_ID, ipv4_id,
			ESF_DZ_TX_TSO_IP_ID, inner_ipv4_id,
			ESF_DZ_TX_TSO_TCP_SEQNO, seqnum
			);
	++tx_queue->insert_count;
@@ -2226,11 +2271,12 @@ static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue,
	buffer->flags = EFX_TX_BUF_OPTION;
	buffer->len = 0;
	buffer->unmap_len = 0;
	EFX_POPULATE_QWORD_4(buffer->option,
	EFX_POPULATE_QWORD_5(buffer->option,
			ESF_DZ_TX_DESC_IS_OPT, 1,
			ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_TSO,
			ESF_DZ_TX_TSO_OPTION_TYPE,
			ESE_DZ_TX_TSO_OPTION_DESC_FATSO2B,
			ESF_DZ_TX_TSO_OUTER_IPID, outer_ipv4_id,
			ESF_DZ_TX_TSO_TCP_MSS, mss
			);
	++tx_queue->insert_count;
@@ -2254,11 +2300,11 @@ static u32 efx_ef10_tso_versions(struct efx_nic *efx)

static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
{
	bool csum_offload = tx_queue->label & EFX_TXQ_TYPE_OFFLOAD;
	bool csum_offload = tx_queue->type & EFX_TXQ_TYPE_OUTER_CSUM;
	bool inner_csum = tx_queue->type & EFX_TXQ_TYPE_INNER_CSUM;
	struct efx_channel *channel = tx_queue->channel;
	struct efx_nic *efx = tx_queue->efx;
	struct efx_ef10_nic_data *nic_data;
	bool tso_v2 = false;
	efx_qword_t *txd;
	int rc;

@@ -2281,15 +2327,18 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
	 * TSOv2 cannot be used with Hardware timestamping, and is never needed
	 * for XDP tx.
	 */
	if (csum_offload && (nic_data->datapath_caps2 &
			(1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN)) &&
	if (efx_has_cap(efx, TX_TSO_V2)) {
		if ((csum_offload || inner_csum) &&
		    !tx_queue->timestamping && !tx_queue->xdp_tx) {
		tso_v2 = true;
			tx_queue->tso_version = 2;
			netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n",
				  channel->channel);
		}
	} else if (efx_has_cap(efx, TX_TSO)) {
		tx_queue->tso_version = 1;
	}

	rc = efx_mcdi_tx_init(tx_queue, tso_v2);
	rc = efx_mcdi_tx_init(tx_queue);
	if (rc)
		goto fail;

@@ -2302,22 +2351,19 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
	tx_queue->buffer[0].flags = EFX_TX_BUF_OPTION;
	tx_queue->insert_count = 1;
	txd = efx_tx_desc(tx_queue, 0);
	EFX_POPULATE_QWORD_5(*txd,
	EFX_POPULATE_QWORD_7(*txd,
			     ESF_DZ_TX_DESC_IS_OPT, true,
			     ESF_DZ_TX_OPTION_TYPE,
			     ESE_DZ_TX_OPTION_DESC_CRC_CSUM,
			     ESF_DZ_TX_OPTION_UDP_TCP_CSUM, csum_offload,
			     ESF_DZ_TX_OPTION_IP_CSUM, csum_offload,
			     ESF_DZ_TX_OPTION_IP_CSUM, csum_offload && tx_queue->tso_version != 2,
			     ESF_DZ_TX_OPTION_INNER_UDP_TCP_CSUM, inner_csum,
			     ESF_DZ_TX_OPTION_INNER_IP_CSUM, inner_csum && tx_queue->tso_version != 2,
			     ESF_DZ_TX_TIMESTAMP, tx_queue->timestamping);
	tx_queue->write_count = 1;

	if (tso_v2) {
		tx_queue->handle_tso = efx_ef10_tx_tso_desc;
		tx_queue->tso_version = 2;
	} else if (nic_data->datapath_caps &
			(1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN)) {
		tx_queue->tso_version = 1;
	}
	if (tx_queue->tso_version == 2 && efx_has_cap(efx, TX_TSO_V2_ENCAP))
		tx_queue->tso_encap = true;

	wmb();
	efx_ef10_push_tx_desc(tx_queue, txd);
@@ -2880,7 +2926,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
	/* Get the transmit queue */
	tx_ev_q_label = EFX_QWORD_FIELD(*event, ESF_DZ_TX_QLABEL);
	tx_queue = efx_channel_get_tx_queue(channel,
					    tx_ev_q_label % EFX_TXQ_TYPES);
					    tx_ev_q_label % EFX_MAX_TXQ_PER_CHANNEL);

	if (!tx_queue->timestamping) {
		/* Transmit completion */
+8 −1
Original line number Diff line number Diff line
@@ -37,7 +37,14 @@ void ef100_tx_init(struct efx_tx_queue *tx_queue)
				    tx_queue->channel->channel -
				    tx_queue->efx->tx_channel_offset);

	if (efx_mcdi_tx_init(tx_queue, false))
	/* This value is purely documentational; as EF100 never passes through
	 * the switch statement in tx.c:__efx_enqueue_skb(), that switch does
	 * not handle case 3.  EF100's TSOv3 descriptors are generated by
	 * ef100_make_tso_desc().
	 * Meanwhile, all efx_mcdi_tx_init() cares about is that it's not 2.
	 */
	tx_queue->tso_version = 3;
	if (efx_mcdi_tx_init(tx_queue))
		netdev_WARN(tx_queue->efx->net_dev,
			    "failed to initialise TXQ %d\n", tx_queue->queue);
}
+1 −0
Original line number Diff line number Diff line
@@ -596,6 +596,7 @@ static const struct net_device_ops efx_netdev_ops = {
	.ndo_set_mac_address	= efx_set_mac_address,
	.ndo_set_rx_mode	= efx_set_rx_mode,
	.ndo_set_features	= efx_set_features,
	.ndo_features_check	= efx_features_check,
	.ndo_vlan_rx_add_vid	= efx_vlan_rx_add_vid,
	.ndo_vlan_rx_kill_vid	= efx_vlan_rx_kill_vid,
#ifdef CONFIG_SFC_SRIOV
+5 −5
Original line number Diff line number Diff line
@@ -151,7 +151,7 @@ static int efx_allocate_msix_channels(struct efx_nic *efx,
	 */

	n_xdp_tx = num_possible_cpus();
	n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_TXQ_TYPES);
	n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_MAX_TXQ_PER_CHANNEL);

	vec_count = pci_msix_vec_count(efx->pci_dev);
	if (vec_count < 0)
@@ -179,7 +179,7 @@ static int efx_allocate_msix_channels(struct efx_nic *efx,
		efx->xdp_tx_queue_count = 0;
	} else {
		efx->n_xdp_channels = n_xdp_ev;
		efx->xdp_tx_per_channel = EFX_TXQ_TYPES;
		efx->xdp_tx_per_channel = EFX_MAX_TXQ_PER_CHANNEL;
		efx->xdp_tx_queue_count = n_xdp_tx;
		n_channels += n_xdp_ev;
		netif_dbg(efx, drv, efx->net_dev,
@@ -520,7 +520,7 @@ static struct efx_channel *efx_alloc_channel(struct efx_nic *efx, int i)
	channel->channel = i;
	channel->type = &efx_default_channel_type;

	for (j = 0; j < EFX_TXQ_TYPES; j++) {
	for (j = 0; j < EFX_MAX_TXQ_PER_CHANNEL; j++) {
		tx_queue = &channel->tx_queue[j];
		tx_queue->efx = efx;
		tx_queue->queue = -1;
@@ -594,7 +594,7 @@ struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel)
	channel->napi_str.state = 0;
	memset(&channel->eventq, 0, sizeof(channel->eventq));

	for (j = 0; j < EFX_TXQ_TYPES; j++) {
	for (j = 0; j < EFX_MAX_TXQ_PER_CHANNEL; j++) {
		tx_queue = &channel->tx_queue[j];
		if (tx_queue->channel)
			tx_queue->channel = channel;
@@ -894,7 +894,7 @@ int efx_set_channels(struct efx_nic *efx)
						  xdp_queue_number, tx_queue->queue);
					/* We may have a few left-over XDP TX
					 * queues owing to xdp_tx_queue_count
					 * not dividing evenly by EFX_TXQ_TYPES.
					 * not dividing evenly by EFX_MAX_TXQ_PER_CHANNEL.
					 * We still allocate and probe those
					 * TXQs, but never use them.
					 */
+84 −0
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@
#include "net_driver.h"
#include <linux/module.h>
#include <linux/netdevice.h>
#include <net/gre.h>
#include "efx_common.h"
#include "efx_channels.h"
#include "efx.h"
@@ -1287,6 +1288,89 @@ const struct pci_error_handlers efx_err_handlers = {
	.resume		= efx_io_resume,
};

/* Determine whether the NIC will be able to handle TX offloads for a given
 * encapsulated packet.
 */
static bool efx_can_encap_offloads(struct efx_nic *efx, struct sk_buff *skb)
{
	struct gre_base_hdr *greh;
	__be16 dst_port;
	u8 ipproto;

	/* Does the NIC support encap offloads?
	 * If not, we should never get here, because we shouldn't have
	 * advertised encap offload feature flags in the first place.
	 */
	if (WARN_ON_ONCE(!efx->type->udp_tnl_has_port))
		return false;

	/* Determine encapsulation protocol in use */
	switch (skb->protocol) {
	case htons(ETH_P_IP):
		ipproto = ip_hdr(skb)->protocol;
		break;
	case htons(ETH_P_IPV6):
		/* If there are extension headers, this will cause us to
		 * think we can't offload something that we maybe could have.
		 */
		ipproto = ipv6_hdr(skb)->nexthdr;
		break;
	default:
		/* Not IP, so can't offload it */
		return false;
	}
	switch (ipproto) {
	case IPPROTO_GRE:
		/* We support NVGRE but not IP over GRE or random gretaps.
		 * Specifically, the NIC will accept GRE as encapsulated if
		 * the inner protocol is Ethernet, but only handle it
		 * correctly if the GRE header is 8 bytes long.  Moreover,
		 * it will not update the Checksum or Sequence Number fields
		 * if they are present.  (The Routing Present flag,
		 * GRE_ROUTING, cannot be set else the header would be more
		 * than 8 bytes long; so we don't have to worry about it.)
		 */
		if (skb->inner_protocol_type != ENCAP_TYPE_ETHER)
			return false;
		if (ntohs(skb->inner_protocol) != ETH_P_TEB)
			return false;
		if (skb_inner_mac_header(skb) - skb_transport_header(skb) != 8)
			return false;
		greh = (struct gre_base_hdr *)skb_transport_header(skb);
		return !(greh->flags & (GRE_CSUM | GRE_SEQ));
	case IPPROTO_UDP:
		/* If the port is registered for a UDP tunnel, we assume the
		 * packet is for that tunnel, and the NIC will handle it as
		 * such.  If not, the NIC won't know what to do with it.
		 */
		dst_port = udp_hdr(skb)->dest;
		return efx->type->udp_tnl_has_port(efx, dst_port);
	default:
		return false;
	}
}

netdev_features_t efx_features_check(struct sk_buff *skb, struct net_device *dev,
				     netdev_features_t features)
{
	struct efx_nic *efx = netdev_priv(dev);

	if (skb->encapsulation) {
		if (features & NETIF_F_GSO_MASK)
			/* Hardware can only do TSO with at most 208 bytes
			 * of headers.
			 */
			if (skb_inner_transport_offset(skb) >
			    EFX_TSO2_MAX_HDRLEN)
				features &= ~(NETIF_F_GSO_MASK);
		if (features & (NETIF_F_GSO_MASK | NETIF_F_CSUM_MASK))
			if (!efx_can_encap_offloads(efx, skb))
				features &= ~(NETIF_F_GSO_MASK |
					      NETIF_F_CSUM_MASK);
	}
	return features;
}

int efx_get_phys_port_id(struct net_device *net_dev,
			 struct netdev_phys_item_id *ppid)
{
Loading