Commit e85fbf53 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'gve-improvements'



Jeroen de Borst says:

====================
gve improvements

This patchset consists of unrelated changes:

A bug fix for an issue that disabled jumbo-frame support, a few code
improvements and minor funcitonal changes and 3 new features:
  Supporting tx|rx-coalesce-usec for DQO
  Suspend/resume/shutdown
  Optional metadata descriptors
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 75df1a24 6081ac20
Loading
Loading
Loading
Loading
+17 −4
Original line number Diff line number Diff line
@@ -229,6 +229,7 @@ struct gve_rx_ring {
/* A TX desc ring entry */
union gve_tx_desc {
	struct gve_tx_pkt_desc pkt; /* first desc for a packet */
	struct gve_tx_mtd_desc mtd; /* optional metadata descriptor */
	struct gve_tx_seg_desc seg; /* subsequent descs for a packet */
};

@@ -441,13 +442,13 @@ struct gve_tx_ring {
 * associated with that irq.
 */
struct gve_notify_block {
	__be32 irq_db_index; /* idx into Bar2 - set by device, must be 1st */
	__be32 *irq_db_index; /* pointer to idx into Bar2 */
	char name[IFNAMSIZ + 16]; /* name registered with the kernel */
	struct napi_struct napi; /* kernel napi struct for this block */
	struct gve_priv *priv;
	struct gve_tx_ring *tx; /* tx rings on this block */
	struct gve_rx_ring *rx; /* rx rings on this block */
} ____cacheline_aligned;
};

/* Tracks allowed and current queue settings */
struct gve_queue_config {
@@ -466,6 +467,10 @@ struct gve_options_dqo_rda {
	u16 rx_buff_ring_entries; /* number of rx_buff descriptors */
};

struct gve_irq_db {
	__be32 index;
} ____cacheline_aligned;

struct gve_ptype {
	u8 l3_type;  /* `gve_l3_type` in gve_adminq.h */
	u8 l4_type;  /* `gve_l4_type` in gve_adminq.h */
@@ -492,7 +497,8 @@ struct gve_priv {
	struct gve_rx_ring *rx; /* array of rx_cfg.num_queues */
	struct gve_queue_page_list *qpls; /* array of num qpls */
	struct gve_notify_block *ntfy_blocks; /* array of num_ntfy_blks */
	dma_addr_t ntfy_block_bus;
	struct gve_irq_db *irq_db_indices; /* array of num_ntfy_blks */
	dma_addr_t irq_db_indices_bus;
	struct msix_entry *msix_vectors; /* array of num_ntfy_blks + 1 */
	char mgmt_msix_name[IFNAMSIZ + 16];
	u32 mgmt_msix_idx;
@@ -551,6 +557,8 @@ struct gve_priv {
	u32 page_alloc_fail; /* count of page alloc fails */
	u32 dma_mapping_error; /* count of dma mapping errors */
	u32 stats_report_trigger_cnt; /* count of device-requested stats-reports since last reset */
	u32 suspend_cnt; /* count of times suspended */
	u32 resume_cnt; /* count of times resumed */
	struct workqueue_struct *gve_wq;
	struct work_struct service_task;
	struct work_struct stats_report_task;
@@ -567,6 +575,7 @@ struct gve_priv {

	/* Gvnic device link speed from hypervisor. */
	u64 link_speed;
	bool up_before_suspend; /* True if dev was up before suspend */

	struct gve_options_dqo_rda options_dqo_rda;
	struct gve_ptype_lut *ptype_lut_dqo;
@@ -575,6 +584,10 @@ struct gve_priv {
	int data_buffer_size_dqo;

	enum gve_queue_format queue_format;

	/* Interrupt coalescing settings */
	u32 tx_coalesce_usecs;
	u32 rx_coalesce_usecs;
};

enum gve_service_task_flags_bit {
@@ -733,7 +746,7 @@ static inline void gve_clear_report_stats(struct gve_priv *priv)
static inline __be32 __iomem *gve_irq_doorbell(struct gve_priv *priv,
					       struct gve_notify_block *block)
{
	return &priv->db_bar2[be32_to_cpu(block->irq_db_index)];
	return &priv->db_bar2[be32_to_cpu(*block->irq_db_index)];
}

/* Returns the index into ntfy_blocks of the given tx ring's block
+5 −5
Original line number Diff line number Diff line
@@ -462,7 +462,7 @@ int gve_adminq_configure_device_resources(struct gve_priv *priv,
		.num_counters = cpu_to_be32(num_counters),
		.irq_db_addr = cpu_to_be64(db_array_bus_addr),
		.num_irq_dbs = cpu_to_be32(num_ntfy_blks),
		.irq_db_stride = cpu_to_be32(sizeof(priv->ntfy_blocks[0])),
		.irq_db_stride = cpu_to_be32(sizeof(*priv->irq_db_indices)),
		.ntfy_blk_msix_base_idx =
					cpu_to_be32(GVE_NTFY_BLK_BASE_MSIX_IDX),
		.queue_format = priv->queue_format,
@@ -738,10 +738,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
	 * is not set to GqiRda, choose the queue format in a priority order:
	 * DqoRda, GqiRda, GqiQpl. Use GqiQpl as default.
	 */
	if (priv->queue_format == GVE_GQI_RDA_FORMAT) {
		dev_info(&priv->pdev->dev,
			 "Driver is running with GQI RDA queue format.\n");
	} else if (dev_op_dqo_rda) {
	if (dev_op_dqo_rda) {
		priv->queue_format = GVE_DQO_RDA_FORMAT;
		dev_info(&priv->pdev->dev,
			 "Driver is running with DQO RDA queue format.\n");
@@ -753,6 +750,9 @@ int gve_adminq_describe_device(struct gve_priv *priv)
			 "Driver is running with GQI RDA queue format.\n");
		supported_features_mask =
			be32_to_cpu(dev_op_gqi_rda->supported_features_mask);
	} else if (priv->queue_format == GVE_GQI_RDA_FORMAT) {
		dev_info(&priv->pdev->dev,
			 "Driver is running with GQI RDA queue format.\n");
	} else {
		priv->queue_format = GVE_GQI_QPL_FORMAT;
		if (dev_op_gqi_qpl)
+20 −0
Original line number Diff line number Diff line
@@ -33,6 +33,14 @@ struct gve_tx_pkt_desc {
	__be64	seg_addr;  /* Base address (see note) of this segment */
} __packed;

struct gve_tx_mtd_desc {
	u8      type_flags;     /* type is lower 4 bits, subtype upper  */
	u8      path_state;     /* state is lower 4 bits, hash type upper */
	__be16  reserved0;
	__be32  path_hash;
	__be64  reserved1;
} __packed;

struct gve_tx_seg_desc {
	u8	type_flags;	/* type is lower 4 bits, flags upper	*/
	u8	l3_offset;	/* TSO: 2 byte units to start of IPH	*/
@@ -46,6 +54,7 @@ struct gve_tx_seg_desc {
#define	GVE_TXD_STD		(0x0 << 4) /* Std with Host Address	*/
#define	GVE_TXD_TSO		(0x1 << 4) /* TSO with Host Address	*/
#define	GVE_TXD_SEG		(0x2 << 4) /* Seg with Host Address	*/
#define	GVE_TXD_MTD		(0x3 << 4) /* Metadata			*/

/* GVE Transmit Descriptor Flags for Std Pkts */
#define	GVE_TXF_L4CSUM	BIT(0)	/* Need csum offload */
@@ -54,6 +63,17 @@ struct gve_tx_seg_desc {
/* GVE Transmit Descriptor Flags for TSO Segs */
#define	GVE_TXSF_IPV6	BIT(1)	/* IPv6 TSO */

/* GVE Transmit Descriptor Options for MTD Segs */
#define GVE_MTD_SUBTYPE_PATH		0

#define GVE_MTD_PATH_STATE_DEFAULT	0
#define GVE_MTD_PATH_STATE_TIMEOUT	1
#define GVE_MTD_PATH_STATE_CONGESTION	2
#define GVE_MTD_PATH_STATE_RETRANSMIT	3

#define GVE_MTD_PATH_HASH_NONE         (0x0 << 4)
#define GVE_MTD_PATH_HASH_L4           (0x1 << 4)

/* GVE Receive Packet Descriptor */
/* The start of an ethernet packet comes 2 bytes into the rx buffer.
 * gVNIC adds this padding so that both the DMA and the L3/4 protocol header
+18 −6
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@

#define GVE_TX_IRQ_RATELIMIT_US_DQO 50
#define GVE_RX_IRQ_RATELIMIT_US_DQO 20
#define GVE_MAX_ITR_INTERVAL_DQO (GVE_ITR_INTERVAL_DQO_MASK * 2)

/* Timeout in seconds to wait for a reinjection completion after receiving
 * its corresponding miss completion.
@@ -54,17 +55,17 @@ gve_tx_put_doorbell_dqo(const struct gve_priv *priv,
}

/* Builds register value to write to DQO IRQ doorbell to enable with specified
 * ratelimit.
 * ITR interval.
 */
static inline u32 gve_set_itr_ratelimit_dqo(u32 ratelimit_us)
static inline u32 gve_setup_itr_interval_dqo(u32 interval_us)
{
	u32 result = GVE_ITR_ENABLE_BIT_DQO;

	/* Interval has 2us granularity. */
	ratelimit_us >>= 1;
	interval_us >>= 1;

	ratelimit_us &= GVE_ITR_INTERVAL_DQO_MASK;
	result |= (ratelimit_us << GVE_ITR_INTERVAL_DQO_SHIFT);
	interval_us &= GVE_ITR_INTERVAL_DQO_MASK;
	result |= (interval_us << GVE_ITR_INTERVAL_DQO_SHIFT);

	return result;
}
@@ -73,9 +74,20 @@ static inline void
gve_write_irq_doorbell_dqo(const struct gve_priv *priv,
			   const struct gve_notify_block *block, u32 val)
{
	u32 index = be32_to_cpu(block->irq_db_index);
	u32 index = be32_to_cpu(*block->irq_db_index);

	iowrite32(val, &priv->db_bar2[index]);
}

/* Sets interrupt throttling interval and enables interrupt
 * by writing to IRQ doorbell.
 */
static inline void
gve_set_itr_coalesce_usecs_dqo(struct gve_priv *priv,
			       struct gve_notify_block *block,
			       u32 usecs)
{
	gve_write_irq_doorbell_dqo(priv, block,
				   gve_setup_itr_interval_dqo(usecs));
}
#endif /* _GVE_DQO_H_ */
+73 −9
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@
#include <linux/rtnetlink.h>
#include "gve.h"
#include "gve_adminq.h"
#include "gve_dqo.h"

static void gve_get_drvinfo(struct net_device *netdev,
			    struct ethtool_drvinfo *info)
@@ -42,7 +43,7 @@ static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = {
};

static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = {
	"rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_bytes[%u]",
	"rx_posted_desc[%u]", "rx_completed_desc[%u]", "rx_consumed_desc[%u]", "rx_bytes[%u]",
	"rx_cont_packet_cnt[%u]", "rx_frag_flip_cnt[%u]", "rx_frag_copy_cnt[%u]",
	"rx_dropped_pkt[%u]", "rx_copybreak_pkt[%u]", "rx_copied_pkt[%u]",
	"rx_queue_drop_cnt[%u]", "rx_no_buffers_posted[%u]",
@@ -50,7 +51,7 @@ static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = {
};

static const char gve_gstrings_tx_stats[][ETH_GSTRING_LEN] = {
	"tx_posted_desc[%u]", "tx_completed_desc[%u]", "tx_bytes[%u]",
	"tx_posted_desc[%u]", "tx_completed_desc[%u]", "tx_consumed_desc[%u]", "tx_bytes[%u]",
	"tx_wake[%u]", "tx_stop[%u]", "tx_event_counter[%u]",
	"tx_dma_mapping_error[%u]",
};
@@ -139,10 +140,11 @@ static void
gve_get_ethtool_stats(struct net_device *netdev,
		      struct ethtool_stats *stats, u64 *data)
{
	u64 tmp_rx_pkts, tmp_rx_bytes, tmp_rx_skb_alloc_fail,	tmp_rx_buf_alloc_fail,
		tmp_rx_desc_err_dropped_pkt, tmp_tx_pkts, tmp_tx_bytes;
	u64 tmp_rx_pkts, tmp_rx_bytes, tmp_rx_skb_alloc_fail,
		tmp_rx_buf_alloc_fail, tmp_rx_desc_err_dropped_pkt,
		tmp_tx_pkts, tmp_tx_bytes;
	u64 rx_buf_alloc_fail, rx_desc_err_dropped_pkt, rx_pkts,
		rx_skb_alloc_fail, rx_bytes, tx_pkts, tx_bytes;
		rx_skb_alloc_fail, rx_bytes, tx_pkts, tx_bytes, tx_dropped;
	int stats_idx, base_stats_idx, max_stats_idx;
	struct stats *report_stats;
	int *rx_qid_to_stats_idx;
@@ -191,7 +193,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
			rx_desc_err_dropped_pkt += tmp_rx_desc_err_dropped_pkt;
		}
	}
	for (tx_pkts = 0, tx_bytes = 0, ring = 0;
	for (tx_pkts = 0, tx_bytes = 0, tx_dropped = 0, ring = 0;
	     ring < priv->tx_cfg.num_queues; ring++) {
		if (priv->tx) {
			do {
@@ -203,6 +205,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
						       start));
			tx_pkts += tmp_tx_pkts;
			tx_bytes += tmp_tx_bytes;
			tx_dropped += priv->tx[ring].dropped_pkt;
		}
	}

@@ -214,9 +217,7 @@ gve_get_ethtool_stats(struct net_device *netdev,
	/* total rx dropped packets */
	data[i++] = rx_skb_alloc_fail + rx_buf_alloc_fail +
		    rx_desc_err_dropped_pkt;
	/* Skip tx_dropped */
	i++;

	data[i++] = tx_dropped;
	data[i++] = priv->tx_timeo_cnt;
	data[i++] = rx_skb_alloc_fail;
	data[i++] = rx_buf_alloc_fail;
@@ -255,6 +256,7 @@ gve_get_ethtool_stats(struct net_device *netdev,

			data[i++] = rx->fill_cnt;
			data[i++] = rx->cnt;
			data[i++] = rx->fill_cnt - rx->cnt;
			do {
				start =
				  u64_stats_fetch_begin(&priv->rx[ring].statss);
@@ -318,12 +320,14 @@ gve_get_ethtool_stats(struct net_device *netdev,
			if (gve_is_gqi(priv)) {
				data[i++] = tx->req;
				data[i++] = tx->done;
				data[i++] = tx->req - tx->done;
			} else {
				/* DQO doesn't currently support
				 * posted/completed descriptor counts;
				 */
				data[i++] = 0;
				data[i++] = 0;
				data[i++] = tx->dqo_tx.tail - tx->dqo_tx.head;
			}
			do {
				start =
@@ -537,7 +541,65 @@ static int gve_get_link_ksettings(struct net_device *netdev,
	return err;
}

static int gve_get_coalesce(struct net_device *netdev,
			    struct ethtool_coalesce *ec,
			    struct kernel_ethtool_coalesce *kernel_ec,
			    struct netlink_ext_ack *extack)
{
	struct gve_priv *priv = netdev_priv(netdev);

	if (gve_is_gqi(priv))
		return -EOPNOTSUPP;
	ec->tx_coalesce_usecs = priv->tx_coalesce_usecs;
	ec->rx_coalesce_usecs = priv->rx_coalesce_usecs;

	return 0;
}

static int gve_set_coalesce(struct net_device *netdev,
			    struct ethtool_coalesce *ec,
			    struct kernel_ethtool_coalesce *kernel_ec,
			    struct netlink_ext_ack *extack)
{
	struct gve_priv *priv = netdev_priv(netdev);
	u32 tx_usecs_orig = priv->tx_coalesce_usecs;
	u32 rx_usecs_orig = priv->rx_coalesce_usecs;
	int idx;

	if (gve_is_gqi(priv))
		return -EOPNOTSUPP;

	if (ec->tx_coalesce_usecs > GVE_MAX_ITR_INTERVAL_DQO ||
	    ec->rx_coalesce_usecs > GVE_MAX_ITR_INTERVAL_DQO)
		return -EINVAL;
	priv->tx_coalesce_usecs = ec->tx_coalesce_usecs;
	priv->rx_coalesce_usecs = ec->rx_coalesce_usecs;

	if (tx_usecs_orig != priv->tx_coalesce_usecs) {
		for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
			int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx);
			struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];

			gve_set_itr_coalesce_usecs_dqo(priv, block,
						       priv->tx_coalesce_usecs);
		}
	}

	if (rx_usecs_orig != priv->rx_coalesce_usecs) {
		for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) {
			int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx);
			struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];

			gve_set_itr_coalesce_usecs_dqo(priv, block,
						       priv->rx_coalesce_usecs);
		}
	}

	return 0;
}

const struct ethtool_ops gve_ethtool_ops = {
	.supported_coalesce_params = ETHTOOL_COALESCE_USECS,
	.get_drvinfo = gve_get_drvinfo,
	.get_strings = gve_get_strings,
	.get_sset_count = gve_get_sset_count,
@@ -547,6 +609,8 @@ const struct ethtool_ops gve_ethtool_ops = {
	.set_channels = gve_set_channels,
	.get_channels = gve_get_channels,
	.get_link = ethtool_op_get_link,
	.get_coalesce = gve_get_coalesce,
	.set_coalesce = gve_set_coalesce,
	.get_ringparam = gve_get_ringparam,
	.reset = gve_user_reset,
	.get_tunable = gve_get_tunable,
Loading