Commit 48ae409a authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'gve-desc'



Rushil Gupta says:

====================
gve: Add QPL mode for DQO descriptor format

GVE supports QPL ("queue-page-list") mode where
all data is communicated through a set of pre-registered
pages. Adding this mode to DQO.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 16fd7539 5a3f8d12
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -52,6 +52,15 @@ Descriptor Formats
GVE supports two descriptor formats: GQI and DQO. These two formats have
entirely different descriptors, which will be described below.

Addressing Mode
------------------
GVE supports two addressing modes: QPL and RDA.
QPL ("queue-page-list") mode communicates data through a set of
pre-registered pages.

For RDA ("raw DMA addressing") mode, the set of pages is dynamic.
Therefore, the packet buffers can be anywhere in guest memory.

Registers
---------
All registers are MMIO.
+107 −5
Original line number Diff line number Diff line
@@ -52,6 +52,26 @@

#define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182

#define DQO_QPL_DEFAULT_TX_PAGES 512
#define DQO_QPL_DEFAULT_RX_PAGES 2048

/* Maximum TSO size supported on DQO */
#define GVE_DQO_TX_MAX	0x3FFFF

#define GVE_TX_BUF_SHIFT_DQO 11

/* 2K buffers for DQO-QPL */
#define GVE_TX_BUF_SIZE_DQO BIT(GVE_TX_BUF_SHIFT_DQO)
#define GVE_TX_BUFS_PER_PAGE_DQO (PAGE_SIZE >> GVE_TX_BUF_SHIFT_DQO)
#define GVE_MAX_TX_BUFS_PER_PKT (DIV_ROUND_UP(GVE_DQO_TX_MAX, GVE_TX_BUF_SIZE_DQO))

/* If number of free/recyclable buffers are less than this threshold; driver
 * allocs and uses a non-qpl page on the receive path of DQO QPL to free
 * up buffers.
 * Value is set big enough to post at least 3 64K LRO packet via 2K buffer to NIC.
 */
#define GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD 96

/* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
struct gve_rx_desc_queue {
	struct gve_rx_desc *desc_ring; /* the descriptor ring */
@@ -218,6 +238,15 @@ struct gve_rx_ring {
			 * which cannot be reused yet.
			 */
			struct gve_index_list used_buf_states;

			/* qpl assigned to this queue */
			struct gve_queue_page_list *qpl;

			/* index into queue page list */
			u32 next_qpl_page_idx;

			/* track number of used buffers */
			u16 used_buf_states_cnt;
		} dqo;
	};

@@ -329,8 +358,14 @@ struct gve_tx_pending_packet_dqo {
	 * All others correspond to `skb`'s frags and should be unmapped with
	 * `dma_unmap_page`.
	 */
	union {
		struct {
			DEFINE_DMA_UNMAP_ADDR(dma[MAX_SKB_FRAGS + 1]);
			DEFINE_DMA_UNMAP_LEN(len[MAX_SKB_FRAGS + 1]);
		};
		s16 tx_qpl_buf_ids[GVE_MAX_TX_BUFS_PER_PKT];
	};

	u16 num_bufs;

	/* Linked list index to next element in the list, or -1 if none */
@@ -385,6 +420,32 @@ struct gve_tx_ring {
			 * set.
			 */
			u32 last_re_idx;

			/* free running number of packet buf descriptors posted */
			u16 posted_packet_desc_cnt;
			/* free running number of packet buf descriptors completed */
			u16 completed_packet_desc_cnt;

			/* QPL fields */
			struct {
			       /* Linked list of gve_tx_buf_dqo. Index into
				* tx_qpl_buf_next, or -1 if empty.
				*
				* This is a consumer list owned by the TX path. When it
				* runs out, the producer list is stolen from the
				* completion handling path
				* (dqo_compl.free_tx_qpl_buf_head).
				*/
				s16 free_tx_qpl_buf_head;

			       /* Free running count of the number of QPL tx buffers
				* allocated
				*/
				u32 alloc_tx_qpl_buf_cnt;

				/* Cached value of `dqo_compl.free_tx_qpl_buf_cnt` */
				u32 free_tx_qpl_buf_cnt;
			};
		} dqo_tx;
	};

@@ -428,6 +489,24 @@ struct gve_tx_ring {
			 * reached a specified timeout.
			 */
			struct gve_index_list timed_out_completions;

			/* QPL fields */
			struct {
				/* Linked list of gve_tx_buf_dqo. Index into
				 * tx_qpl_buf_next, or -1 if empty.
				 *
				 * This is the producer list, owned by the completion
				 * handling path. When the consumer list
				 * (dqo_tx.free_tx_qpl_buf_head) is runs out, this list
				 * will be stolen.
				 */
				atomic_t free_tx_qpl_buf_head;

				/* Free running count of the number of tx buffers
				 * freed
				 */
				atomic_t free_tx_qpl_buf_cnt;
			};
		} dqo_compl;
	} ____cacheline_aligned;
	u64 pkt_done; /* free-running - total packets completed */
@@ -454,6 +533,21 @@ struct gve_tx_ring {
			s16 num_pending_packets;

			u32 complq_mask; /* complq size is complq_mask + 1 */

			/* QPL fields */
			struct {
				/* qpl assigned to this queue */
				struct gve_queue_page_list *qpl;

				/* Each QPL page is divided into TX bounce buffers
				 * of size GVE_TX_BUF_SIZE_DQO. tx_qpl_buf_next is
				 * an array to manage linked lists of TX buffers.
				 * An entry j at index i implies that j'th buffer
				 * is next on the list after i
				 */
				s16 *tx_qpl_buf_next;
				u32 num_tx_qpl_bufs;
			};
		} dqo;
	} ____cacheline_aligned;
	struct netdev_queue *netdev_txq;
@@ -532,6 +626,7 @@ enum gve_queue_format {
	GVE_GQI_RDA_FORMAT		= 0x1,
	GVE_GQI_QPL_FORMAT		= 0x2,
	GVE_DQO_RDA_FORMAT		= 0x3,
	GVE_DQO_QPL_FORMAT		= 0x4,
};

struct gve_priv {
@@ -551,7 +646,8 @@ struct gve_priv {
	u16 num_event_counters;
	u16 tx_desc_cnt; /* num desc per ring */
	u16 rx_desc_cnt; /* num desc per ring */
	u16 tx_pages_per_qpl; /* tx buffer length */
	u16 tx_pages_per_qpl; /* Suggested number of pages per qpl for TX queues by NIC */
	u16 rx_pages_per_qpl; /* Suggested number of pages per qpl for RX queues by NIC */
	u16 rx_data_slot_cnt; /* rx buffer length */
	u64 max_registered_pages;
	u64 num_registered_pages; /* num pages registered with NIC */
@@ -809,11 +905,17 @@ static inline u32 gve_rx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx)
	return (priv->num_ntfy_blks / 2) + queue_idx;
}

static inline bool gve_is_qpl(struct gve_priv *priv)
{
	return priv->queue_format == GVE_GQI_QPL_FORMAT ||
		priv->queue_format == GVE_DQO_QPL_FORMAT;
}

/* Returns the number of tx queue page lists
 */
static inline u32 gve_num_tx_qpls(struct gve_priv *priv)
{
	if (priv->queue_format != GVE_GQI_QPL_FORMAT)
	if (!gve_is_qpl(priv))
		return 0;

	return priv->tx_cfg.num_queues + priv->num_xdp_queues;
@@ -833,7 +935,7 @@ static inline u32 gve_num_xdp_qpls(struct gve_priv *priv)
 */
static inline u32 gve_num_rx_qpls(struct gve_priv *priv)
{
	if (priv->queue_format != GVE_GQI_QPL_FORMAT)
	if (!gve_is_qpl(priv))
		return 0;

	return priv->rx_cfg.num_queues;
+78 −11
Original line number Diff line number Diff line
@@ -39,7 +39,8 @@ void gve_parse_device_option(struct gve_priv *priv,
			     struct gve_device_option_gqi_rda **dev_op_gqi_rda,
			     struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
			     struct gve_device_option_dqo_rda **dev_op_dqo_rda,
			     struct gve_device_option_jumbo_frames **dev_op_jumbo_frames)
			     struct gve_device_option_jumbo_frames **dev_op_jumbo_frames,
			     struct gve_device_option_dqo_qpl **dev_op_dqo_qpl)
{
	u32 req_feat_mask = be32_to_cpu(option->required_features_mask);
	u16 option_length = be16_to_cpu(option->option_length);
@@ -112,6 +113,22 @@ void gve_parse_device_option(struct gve_priv *priv,
		}
		*dev_op_dqo_rda = (void *)(option + 1);
		break;
	case GVE_DEV_OPT_ID_DQO_QPL:
		if (option_length < sizeof(**dev_op_dqo_qpl) ||
		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL) {
			dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
				 "DQO QPL", (int)sizeof(**dev_op_dqo_qpl),
				 GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL,
				 option_length, req_feat_mask);
			break;
		}

		if (option_length > sizeof(**dev_op_dqo_qpl)) {
			dev_warn(&priv->pdev->dev,
				 GVE_DEVICE_OPTION_TOO_BIG_FMT, "DQO QPL");
		}
		*dev_op_dqo_qpl = (void *)(option + 1);
		break;
	case GVE_DEV_OPT_ID_JUMBO_FRAMES:
		if (option_length < sizeof(**dev_op_jumbo_frames) ||
		    req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES) {
@@ -146,7 +163,8 @@ gve_process_device_options(struct gve_priv *priv,
			   struct gve_device_option_gqi_rda **dev_op_gqi_rda,
			   struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
			   struct gve_device_option_dqo_rda **dev_op_dqo_rda,
			   struct gve_device_option_jumbo_frames **dev_op_jumbo_frames)
			   struct gve_device_option_jumbo_frames **dev_op_jumbo_frames,
			   struct gve_device_option_dqo_qpl **dev_op_dqo_qpl)
{
	const int num_options = be16_to_cpu(descriptor->num_device_options);
	struct gve_device_option *dev_opt;
@@ -166,7 +184,8 @@ gve_process_device_options(struct gve_priv *priv,

		gve_parse_device_option(priv, descriptor, dev_opt,
					dev_op_gqi_rda, dev_op_gqi_qpl,
					dev_op_dqo_rda, dev_op_jumbo_frames);
					dev_op_dqo_rda, dev_op_jumbo_frames,
					dev_op_dqo_qpl);
		dev_opt = next_opt;
	}

@@ -505,12 +524,24 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)

		cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
	} else {
		u16 comp_ring_size;
		u32 qpl_id = 0;

		if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
			qpl_id = GVE_RAW_ADDRESSING_QPL_ID;
			comp_ring_size =
				priv->options_dqo_rda.tx_comp_ring_entries;
		} else {
			qpl_id = tx->dqo.qpl->id;
			comp_ring_size = priv->tx_desc_cnt;
		}
		cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
		cmd.create_tx_queue.tx_ring_size =
			cpu_to_be16(priv->tx_desc_cnt);
		cmd.create_tx_queue.tx_comp_ring_addr =
			cpu_to_be64(tx->complq_bus_dqo);
		cmd.create_tx_queue.tx_comp_ring_size =
			cpu_to_be16(priv->options_dqo_rda.tx_comp_ring_entries);
			cpu_to_be16(comp_ring_size);
	}

	return gve_adminq_issue_cmd(priv, &cmd);
@@ -555,6 +586,18 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
		cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
		cmd.create_rx_queue.packet_buffer_size = cpu_to_be16(rx->packet_buffer_size);
	} else {
		u16 rx_buff_ring_entries;
		u32 qpl_id = 0;

		if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
			qpl_id = GVE_RAW_ADDRESSING_QPL_ID;
			rx_buff_ring_entries =
				priv->options_dqo_rda.rx_buff_ring_entries;
		} else {
			qpl_id = rx->dqo.qpl->id;
			rx_buff_ring_entries = priv->rx_desc_cnt;
		}
		cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
		cmd.create_rx_queue.rx_ring_size =
			cpu_to_be16(priv->rx_desc_cnt);
		cmd.create_rx_queue.rx_desc_ring_addr =
@@ -564,7 +607,7 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
		cmd.create_rx_queue.packet_buffer_size =
			cpu_to_be16(priv->data_buffer_size_dqo);
		cmd.create_rx_queue.rx_buff_ring_size =
			cpu_to_be16(priv->options_dqo_rda.rx_buff_ring_entries);
			cpu_to_be16(rx_buff_ring_entries);
		cmd.create_rx_queue.enable_rsc =
			!!(priv->dev->features & NETIF_F_LRO);
	}
@@ -675,9 +718,13 @@ gve_set_desc_cnt_dqo(struct gve_priv *priv,
		     const struct gve_device_option_dqo_rda *dev_op_dqo_rda)
{
	priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
	priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);

	if (priv->queue_format == GVE_DQO_QPL_FORMAT)
		return 0;

	priv->options_dqo_rda.tx_comp_ring_entries =
		be16_to_cpu(dev_op_dqo_rda->tx_comp_ring_entries);
	priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
	priv->options_dqo_rda.rx_buff_ring_entries =
		be16_to_cpu(dev_op_dqo_rda->rx_buff_ring_entries);

@@ -687,7 +734,9 @@ gve_set_desc_cnt_dqo(struct gve_priv *priv,
static void gve_enable_supported_features(struct gve_priv *priv,
					  u32 supported_features_mask,
					  const struct gve_device_option_jumbo_frames
						  *dev_op_jumbo_frames)
					  *dev_op_jumbo_frames,
					  const struct gve_device_option_dqo_qpl
					  *dev_op_dqo_qpl)
{
	/* Before control reaches this point, the page-size-capped max MTU from
	 * the gve_device_descriptor field has already been stored in
@@ -699,6 +748,18 @@ static void gve_enable_supported_features(struct gve_priv *priv,
			 "JUMBO FRAMES device option enabled.\n");
		priv->dev->max_mtu = be16_to_cpu(dev_op_jumbo_frames->max_mtu);
	}

	/* Override pages for qpl for DQO-QPL */
	if (dev_op_dqo_qpl) {
		priv->tx_pages_per_qpl =
			be16_to_cpu(dev_op_dqo_qpl->tx_pages_per_qpl);
		priv->rx_pages_per_qpl =
			be16_to_cpu(dev_op_dqo_qpl->rx_pages_per_qpl);
		if (priv->tx_pages_per_qpl == 0)
			priv->tx_pages_per_qpl = DQO_QPL_DEFAULT_TX_PAGES;
		if (priv->rx_pages_per_qpl == 0)
			priv->rx_pages_per_qpl = DQO_QPL_DEFAULT_RX_PAGES;
	}
}

int gve_adminq_describe_device(struct gve_priv *priv)
@@ -707,6 +768,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
	struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL;
	struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL;
	struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL;
	struct gve_device_option_dqo_qpl *dev_op_dqo_qpl = NULL;
	struct gve_device_descriptor *descriptor;
	u32 supported_features_mask = 0;
	union gve_adminq_command cmd;
@@ -733,13 +795,14 @@ int gve_adminq_describe_device(struct gve_priv *priv)

	err = gve_process_device_options(priv, descriptor, &dev_op_gqi_rda,
					 &dev_op_gqi_qpl, &dev_op_dqo_rda,
					 &dev_op_jumbo_frames);
					 &dev_op_jumbo_frames,
					 &dev_op_dqo_qpl);
	if (err)
		goto free_device_descriptor;

	/* If the GQI_RAW_ADDRESSING option is not enabled and the queue format
	 * is not set to GqiRda, choose the queue format in a priority order:
	 * DqoRda, GqiRda, GqiQpl. Use GqiQpl as default.
	 * DqoRda, DqoQpl, GqiRda, GqiQpl. Use GqiQpl as default.
	 */
	if (dev_op_dqo_rda) {
		priv->queue_format = GVE_DQO_RDA_FORMAT;
@@ -747,6 +810,10 @@ int gve_adminq_describe_device(struct gve_priv *priv)
			 "Driver is running with DQO RDA queue format.\n");
		supported_features_mask =
			be32_to_cpu(dev_op_dqo_rda->supported_features_mask);
	} else if (dev_op_dqo_qpl) {
		priv->queue_format = GVE_DQO_QPL_FORMAT;
		supported_features_mask =
			be32_to_cpu(dev_op_dqo_qpl->supported_features_mask);
	}  else if (dev_op_gqi_rda) {
		priv->queue_format = GVE_GQI_RDA_FORMAT;
		dev_info(&priv->pdev->dev,
@@ -798,7 +865,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
	priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues);

	gve_enable_supported_features(priv, supported_features_mask,
				      dev_op_jumbo_frames);
				      dev_op_jumbo_frames, dev_op_dqo_qpl);

free_device_descriptor:
	dma_free_coherent(&priv->pdev->dev, PAGE_SIZE, descriptor,
+10 −0
Original line number Diff line number Diff line
@@ -109,6 +109,14 @@ struct gve_device_option_dqo_rda {

static_assert(sizeof(struct gve_device_option_dqo_rda) == 8);

struct gve_device_option_dqo_qpl {
	__be32 supported_features_mask;
	__be16 tx_pages_per_qpl;
	__be16 rx_pages_per_qpl;
};

static_assert(sizeof(struct gve_device_option_dqo_qpl) == 8);

struct gve_device_option_jumbo_frames {
	__be32 supported_features_mask;
	__be16 max_mtu;
@@ -130,6 +138,7 @@ enum gve_dev_opt_id {
	GVE_DEV_OPT_ID_GQI_RDA = 0x2,
	GVE_DEV_OPT_ID_GQI_QPL = 0x3,
	GVE_DEV_OPT_ID_DQO_RDA = 0x4,
	GVE_DEV_OPT_ID_DQO_QPL = 0x7,
	GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8,
};

@@ -139,6 +148,7 @@ enum gve_dev_opt_req_feat_mask {
	GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0,
	GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0,
	GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0,
	GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0,
};

enum gve_sup_feature_mask {
+14 −6
Original line number Diff line number Diff line
@@ -31,7 +31,6 @@

// Minimum amount of time between queue kicks in msec (10 seconds)
#define MIN_TX_TIMEOUT_GAP (1000 * 10)
#define DQO_TX_MAX	0x3FFFF

char gve_driver_name[] = "gve";
const char gve_version_str[] = GVE_VERSION;
@@ -494,7 +493,7 @@ static int gve_setup_device_resources(struct gve_priv *priv)
		goto abort_with_stats_report;
	}

	if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
	if (!gve_is_gqi(priv)) {
		priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
					       GFP_KERNEL);
		if (!priv->ptype_lut_dqo) {
@@ -1083,11 +1082,12 @@ static int gve_alloc_xdp_qpls(struct gve_priv *priv)
static int gve_alloc_qpls(struct gve_priv *priv)
{
	int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
	int page_count;
	int start_id;
	int i, j;
	int err;

	if (priv->queue_format != GVE_GQI_QPL_FORMAT)
	if (!gve_is_qpl(priv))
		return 0;

	priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL);
@@ -1095,17 +1095,25 @@ static int gve_alloc_qpls(struct gve_priv *priv)
		return -ENOMEM;

	start_id = gve_tx_start_qpl_id(priv);
	page_count = priv->tx_pages_per_qpl;
	for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
		err = gve_alloc_queue_page_list(priv, i,
						priv->tx_pages_per_qpl);
						page_count);
		if (err)
			goto free_qpls;
	}

	start_id = gve_rx_start_qpl_id(priv);

	/* For GQI_QPL number of pages allocated have 1:1 relationship with
	 * number of descriptors. For DQO, number of pages required are
	 * more than descriptors (because of out of order completions).
	 */
	page_count = priv->queue_format == GVE_GQI_QPL_FORMAT ?
		priv->rx_data_slot_cnt : priv->rx_pages_per_qpl;
	for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
		err = gve_alloc_queue_page_list(priv, i,
						priv->rx_data_slot_cnt);
						page_count);
		if (err)
			goto free_qpls;
	}
@@ -2051,7 +2059,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)

	/* Big TCP is only supported on DQ*/
	if (!gve_is_gqi(priv))
		netif_set_tso_max_size(priv->dev, DQO_TX_MAX);
		netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);

	priv->num_registered_pages = 0;
	priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
Loading