Merge branch 'gve-desc' (48ae409a) · Commits · EulixOS / Software / Kernel

Documentation/networking/device_drivers/ethernet/google/gve.rst

+9 −0

Original line number	Diff line number	Diff line
		@@ -52,6 +52,15 @@ Descriptor Formats
		GVE supports two descriptor formats: GQI and DQO. These two formats have
		entirely different descriptors, which will be described below.

		Addressing Mode
		------------------
		GVE supports two addressing modes: QPL and RDA.
		QPL ("queue-page-list") mode communicates data through a set of
		pre-registered pages.

		For RDA ("raw DMA addressing") mode, the set of pages is dynamic.
		Therefore, the packet buffers can be anywhere in guest memory.

		Registers
		---------
		All registers are MMIO.

drivers/net/ethernet/google/gve/gve.h

+107 −5

Original line number	Diff line number	Diff line
		@@ -52,6 +52,26 @@

		#define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182

		#define DQO_QPL_DEFAULT_TX_PAGES 512
		#define DQO_QPL_DEFAULT_RX_PAGES 2048

		/* Maximum TSO size supported on DQO */
		#define GVE_DQO_TX_MAX 0x3FFFF

		#define GVE_TX_BUF_SHIFT_DQO 11

		/* 2K buffers for DQO-QPL */
		#define GVE_TX_BUF_SIZE_DQO BIT(GVE_TX_BUF_SHIFT_DQO)
		#define GVE_TX_BUFS_PER_PAGE_DQO (PAGE_SIZE >> GVE_TX_BUF_SHIFT_DQO)
		#define GVE_MAX_TX_BUFS_PER_PKT (DIV_ROUND_UP(GVE_DQO_TX_MAX, GVE_TX_BUF_SIZE_DQO))

		/* If number of free/recyclable buffers are less than this threshold; driver
		* allocs and uses a non-qpl page on the receive path of DQO QPL to free
		* up buffers.
		* Value is set big enough to post at least 3 64K LRO packet via 2K buffer to NIC.
		*/
		#define GVE_DQO_QPL_ONDEMAND_ALLOC_THRESHOLD 96

		/* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
		struct gve_rx_desc_queue {
		struct gve_rx_desc desc_ring; / the descriptor ring */
		@@ -218,6 +238,15 @@ struct gve_rx_ring {
		* which cannot be reused yet.
		*/
		struct gve_index_list used_buf_states;

		/* qpl assigned to this queue */
		struct gve_queue_page_list *qpl;

		/* index into queue page list */
		u32 next_qpl_page_idx;

		/* track number of used buffers */
		u16 used_buf_states_cnt;
		} dqo;
		};

		@@ -329,8 +358,14 @@ struct gve_tx_pending_packet_dqo {
		* All others correspond to `skb`'s frags and should be unmapped with
		* `dma_unmap_page`.
		*/
		union {
		struct {
		DEFINE_DMA_UNMAP_ADDR(dma[MAX_SKB_FRAGS + 1]);
		DEFINE_DMA_UNMAP_LEN(len[MAX_SKB_FRAGS + 1]);
		};
		s16 tx_qpl_buf_ids[GVE_MAX_TX_BUFS_PER_PKT];
		};

		u16 num_bufs;

		/* Linked list index to next element in the list, or -1 if none */
		@@ -385,6 +420,32 @@ struct gve_tx_ring {
		* set.
		*/
		u32 last_re_idx;

		/* free running number of packet buf descriptors posted */
		u16 posted_packet_desc_cnt;
		/* free running number of packet buf descriptors completed */
		u16 completed_packet_desc_cnt;

		/* QPL fields */
		struct {
		/* Linked list of gve_tx_buf_dqo. Index into
		* tx_qpl_buf_next, or -1 if empty.
		*
		* This is a consumer list owned by the TX path. When it
		* runs out, the producer list is stolen from the
		* completion handling path
		* (dqo_compl.free_tx_qpl_buf_head).
		*/
		s16 free_tx_qpl_buf_head;

		/* Free running count of the number of QPL tx buffers
		* allocated
		*/
		u32 alloc_tx_qpl_buf_cnt;

		/* Cached value of `dqo_compl.free_tx_qpl_buf_cnt` */
		u32 free_tx_qpl_buf_cnt;
		};
		} dqo_tx;
		};

		@@ -428,6 +489,24 @@ struct gve_tx_ring {
		* reached a specified timeout.
		*/
		struct gve_index_list timed_out_completions;

		/* QPL fields */
		struct {
		/* Linked list of gve_tx_buf_dqo. Index into
		* tx_qpl_buf_next, or -1 if empty.
		*
		* This is the producer list, owned by the completion
		* handling path. When the consumer list
		* (dqo_tx.free_tx_qpl_buf_head) is runs out, this list
		* will be stolen.
		*/
		atomic_t free_tx_qpl_buf_head;

		/* Free running count of the number of tx buffers
		* freed
		*/
		atomic_t free_tx_qpl_buf_cnt;
		};
		} dqo_compl;
		} ____cacheline_aligned;
		u64 pkt_done; /* free-running - total packets completed */
		@@ -454,6 +533,21 @@ struct gve_tx_ring {
		s16 num_pending_packets;

		u32 complq_mask; /* complq size is complq_mask + 1 */

		/* QPL fields */
		struct {
		/* qpl assigned to this queue */
		struct gve_queue_page_list *qpl;

		/* Each QPL page is divided into TX bounce buffers
		* of size GVE_TX_BUF_SIZE_DQO. tx_qpl_buf_next is
		* an array to manage linked lists of TX buffers.
		* An entry j at index i implies that j'th buffer
		* is next on the list after i
		*/
		s16 *tx_qpl_buf_next;
		u32 num_tx_qpl_bufs;
		};
		} dqo;
		} ____cacheline_aligned;
		struct netdev_queue *netdev_txq;
		@@ -532,6 +626,7 @@ enum gve_queue_format {
		GVE_GQI_RDA_FORMAT = 0x1,
		GVE_GQI_QPL_FORMAT = 0x2,
		GVE_DQO_RDA_FORMAT = 0x3,
		GVE_DQO_QPL_FORMAT = 0x4,
		};

		struct gve_priv {
		@@ -551,7 +646,8 @@ struct gve_priv {
		u16 num_event_counters;
		u16 tx_desc_cnt; /* num desc per ring */
		u16 rx_desc_cnt; /* num desc per ring */
		u16 tx_pages_per_qpl; /* tx buffer length */
		u16 tx_pages_per_qpl; /* Suggested number of pages per qpl for TX queues by NIC */
		u16 rx_pages_per_qpl; /* Suggested number of pages per qpl for RX queues by NIC */
		u16 rx_data_slot_cnt; /* rx buffer length */
		u64 max_registered_pages;
		u64 num_registered_pages; /* num pages registered with NIC */
		@@ -809,11 +905,17 @@ static inline u32 gve_rx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx)
		return (priv->num_ntfy_blks / 2) + queue_idx;
		}

		static inline bool gve_is_qpl(struct gve_priv *priv)
		{
		return priv->queue_format == GVE_GQI_QPL_FORMAT \|\|
		priv->queue_format == GVE_DQO_QPL_FORMAT;
		}

		/* Returns the number of tx queue page lists
		*/
		static inline u32 gve_num_tx_qpls(struct gve_priv *priv)
		{
		if (priv->queue_format != GVE_GQI_QPL_FORMAT)
		if (!gve_is_qpl(priv))
		return 0;

		return priv->tx_cfg.num_queues + priv->num_xdp_queues;
		@@ -833,7 +935,7 @@ static inline u32 gve_num_xdp_qpls(struct gve_priv *priv)
		*/
		static inline u32 gve_num_rx_qpls(struct gve_priv *priv)
		{
		if (priv->queue_format != GVE_GQI_QPL_FORMAT)
		if (!gve_is_qpl(priv))
		return 0;

		return priv->rx_cfg.num_queues;

drivers/net/ethernet/google/gve/gve_adminq.c

+78 −11

Original line number	Diff line number	Diff line
		@@ -39,7 +39,8 @@ void gve_parse_device_option(struct gve_priv *priv,
		struct gve_device_option_gqi_rda **dev_op_gqi_rda,
		struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
		struct gve_device_option_dqo_rda **dev_op_dqo_rda,
		struct gve_device_option_jumbo_frames **dev_op_jumbo_frames)
		struct gve_device_option_jumbo_frames **dev_op_jumbo_frames,
		struct gve_device_option_dqo_qpl **dev_op_dqo_qpl)
		{
		u32 req_feat_mask = be32_to_cpu(option->required_features_mask);
		u16 option_length = be16_to_cpu(option->option_length);
		@@ -112,6 +113,22 @@ void gve_parse_device_option(struct gve_priv *priv,
		}
		dev_op_dqo_rda = (void )(option + 1);
		break;
		case GVE_DEV_OPT_ID_DQO_QPL:
		if (option_length < sizeof(**dev_op_dqo_qpl) \|\|
		req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL) {
		dev_warn(&priv->pdev->dev, GVE_DEVICE_OPTION_ERROR_FMT,
		"DQO QPL", (int)sizeof(**dev_op_dqo_qpl),
		GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL,
		option_length, req_feat_mask);
		break;
		}

		if (option_length > sizeof(**dev_op_dqo_qpl)) {
		dev_warn(&priv->pdev->dev,
		GVE_DEVICE_OPTION_TOO_BIG_FMT, "DQO QPL");
		}
		dev_op_dqo_qpl = (void )(option + 1);
		break;
		case GVE_DEV_OPT_ID_JUMBO_FRAMES:
		if (option_length < sizeof(**dev_op_jumbo_frames) \|\|
		req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES) {
		@@ -146,7 +163,8 @@ gve_process_device_options(struct gve_priv *priv,
		struct gve_device_option_gqi_rda **dev_op_gqi_rda,
		struct gve_device_option_gqi_qpl **dev_op_gqi_qpl,
		struct gve_device_option_dqo_rda **dev_op_dqo_rda,
		struct gve_device_option_jumbo_frames **dev_op_jumbo_frames)
		struct gve_device_option_jumbo_frames **dev_op_jumbo_frames,
		struct gve_device_option_dqo_qpl **dev_op_dqo_qpl)
		{
		const int num_options = be16_to_cpu(descriptor->num_device_options);
		struct gve_device_option *dev_opt;
		@@ -166,7 +184,8 @@ gve_process_device_options(struct gve_priv *priv,

		gve_parse_device_option(priv, descriptor, dev_opt,
		dev_op_gqi_rda, dev_op_gqi_qpl,
		dev_op_dqo_rda, dev_op_jumbo_frames);
		dev_op_dqo_rda, dev_op_jumbo_frames,
		dev_op_dqo_qpl);
		dev_opt = next_opt;
		}

		@@ -505,12 +524,24 @@ static int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index)

		cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
		} else {
		u16 comp_ring_size;
		u32 qpl_id = 0;

		if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
		qpl_id = GVE_RAW_ADDRESSING_QPL_ID;
		comp_ring_size =
		priv->options_dqo_rda.tx_comp_ring_entries;
		} else {
		qpl_id = tx->dqo.qpl->id;
		comp_ring_size = priv->tx_desc_cnt;
		}
		cmd.create_tx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
		cmd.create_tx_queue.tx_ring_size =
		cpu_to_be16(priv->tx_desc_cnt);
		cmd.create_tx_queue.tx_comp_ring_addr =
		cpu_to_be64(tx->complq_bus_dqo);
		cmd.create_tx_queue.tx_comp_ring_size =
		cpu_to_be16(priv->options_dqo_rda.tx_comp_ring_entries);
		cpu_to_be16(comp_ring_size);
		}

		return gve_adminq_issue_cmd(priv, &cmd);
		@@ -555,6 +586,18 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
		cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
		cmd.create_rx_queue.packet_buffer_size = cpu_to_be16(rx->packet_buffer_size);
		} else {
		u16 rx_buff_ring_entries;
		u32 qpl_id = 0;

		if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
		qpl_id = GVE_RAW_ADDRESSING_QPL_ID;
		rx_buff_ring_entries =
		priv->options_dqo_rda.rx_buff_ring_entries;
		} else {
		qpl_id = rx->dqo.qpl->id;
		rx_buff_ring_entries = priv->rx_desc_cnt;
		}
		cmd.create_rx_queue.queue_page_list_id = cpu_to_be32(qpl_id);
		cmd.create_rx_queue.rx_ring_size =
		cpu_to_be16(priv->rx_desc_cnt);
		cmd.create_rx_queue.rx_desc_ring_addr =
		@@ -564,7 +607,7 @@ static int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index)
		cmd.create_rx_queue.packet_buffer_size =
		cpu_to_be16(priv->data_buffer_size_dqo);
		cmd.create_rx_queue.rx_buff_ring_size =
		cpu_to_be16(priv->options_dqo_rda.rx_buff_ring_entries);
		cpu_to_be16(rx_buff_ring_entries);
		cmd.create_rx_queue.enable_rsc =
		!!(priv->dev->features & NETIF_F_LRO);
		}
		@@ -675,9 +718,13 @@ gve_set_desc_cnt_dqo(struct gve_priv *priv,
		const struct gve_device_option_dqo_rda *dev_op_dqo_rda)
		{
		priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries);
		priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);

		if (priv->queue_format == GVE_DQO_QPL_FORMAT)
		return 0;

		priv->options_dqo_rda.tx_comp_ring_entries =
		be16_to_cpu(dev_op_dqo_rda->tx_comp_ring_entries);
		priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries);
		priv->options_dqo_rda.rx_buff_ring_entries =
		be16_to_cpu(dev_op_dqo_rda->rx_buff_ring_entries);

		@@ -687,7 +734,9 @@ gve_set_desc_cnt_dqo(struct gve_priv *priv,
		static void gve_enable_supported_features(struct gve_priv *priv,
		u32 supported_features_mask,
		const struct gve_device_option_jumbo_frames
		*dev_op_jumbo_frames)
		*dev_op_jumbo_frames,
		const struct gve_device_option_dqo_qpl
		*dev_op_dqo_qpl)
		{
		/* Before control reaches this point, the page-size-capped max MTU from
		* the gve_device_descriptor field has already been stored in
		@@ -699,6 +748,18 @@ static void gve_enable_supported_features(struct gve_priv *priv,
		"JUMBO FRAMES device option enabled.\n");
		priv->dev->max_mtu = be16_to_cpu(dev_op_jumbo_frames->max_mtu);
		}

		/* Override pages for qpl for DQO-QPL */
		if (dev_op_dqo_qpl) {
		priv->tx_pages_per_qpl =
		be16_to_cpu(dev_op_dqo_qpl->tx_pages_per_qpl);
		priv->rx_pages_per_qpl =
		be16_to_cpu(dev_op_dqo_qpl->rx_pages_per_qpl);
		if (priv->tx_pages_per_qpl == 0)
		priv->tx_pages_per_qpl = DQO_QPL_DEFAULT_TX_PAGES;
		if (priv->rx_pages_per_qpl == 0)
		priv->rx_pages_per_qpl = DQO_QPL_DEFAULT_RX_PAGES;
		}
		}

		int gve_adminq_describe_device(struct gve_priv *priv)
		@@ -707,6 +768,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
		struct gve_device_option_gqi_rda *dev_op_gqi_rda = NULL;
		struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL;
		struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL;
		struct gve_device_option_dqo_qpl *dev_op_dqo_qpl = NULL;
		struct gve_device_descriptor *descriptor;
		u32 supported_features_mask = 0;
		union gve_adminq_command cmd;
		@@ -733,13 +795,14 @@ int gve_adminq_describe_device(struct gve_priv *priv)

		err = gve_process_device_options(priv, descriptor, &dev_op_gqi_rda,
		&dev_op_gqi_qpl, &dev_op_dqo_rda,
		&dev_op_jumbo_frames);
		&dev_op_jumbo_frames,
		&dev_op_dqo_qpl);
		if (err)
		goto free_device_descriptor;

		/* If the GQI_RAW_ADDRESSING option is not enabled and the queue format
		* is not set to GqiRda, choose the queue format in a priority order:
		* DqoRda, GqiRda, GqiQpl. Use GqiQpl as default.
		* DqoRda, DqoQpl, GqiRda, GqiQpl. Use GqiQpl as default.
		*/
		if (dev_op_dqo_rda) {
		priv->queue_format = GVE_DQO_RDA_FORMAT;
		@@ -747,6 +810,10 @@ int gve_adminq_describe_device(struct gve_priv *priv)
		"Driver is running with DQO RDA queue format.\n");
		supported_features_mask =
		be32_to_cpu(dev_op_dqo_rda->supported_features_mask);
		} else if (dev_op_dqo_qpl) {
		priv->queue_format = GVE_DQO_QPL_FORMAT;
		supported_features_mask =
		be32_to_cpu(dev_op_dqo_qpl->supported_features_mask);
		} else if (dev_op_gqi_rda) {
		priv->queue_format = GVE_GQI_RDA_FORMAT;
		dev_info(&priv->pdev->dev,
		@@ -798,7 +865,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
		priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues);

		gve_enable_supported_features(priv, supported_features_mask,
		dev_op_jumbo_frames);
		dev_op_jumbo_frames, dev_op_dqo_qpl);

		free_device_descriptor:
		dma_free_coherent(&priv->pdev->dev, PAGE_SIZE, descriptor,

drivers/net/ethernet/google/gve/gve_adminq.h

+10 −0

Original line number	Diff line number	Diff line
		@@ -109,6 +109,14 @@ struct gve_device_option_dqo_rda {

		static_assert(sizeof(struct gve_device_option_dqo_rda) == 8);

		struct gve_device_option_dqo_qpl {
		__be32 supported_features_mask;
		__be16 tx_pages_per_qpl;
		__be16 rx_pages_per_qpl;
		};

		static_assert(sizeof(struct gve_device_option_dqo_qpl) == 8);

		struct gve_device_option_jumbo_frames {
		__be32 supported_features_mask;
		__be16 max_mtu;
		@@ -130,6 +138,7 @@ enum gve_dev_opt_id {
		GVE_DEV_OPT_ID_GQI_RDA = 0x2,
		GVE_DEV_OPT_ID_GQI_QPL = 0x3,
		GVE_DEV_OPT_ID_DQO_RDA = 0x4,
		GVE_DEV_OPT_ID_DQO_QPL = 0x7,
		GVE_DEV_OPT_ID_JUMBO_FRAMES = 0x8,
		};

		@@ -139,6 +148,7 @@ enum gve_dev_opt_req_feat_mask {
		GVE_DEV_OPT_REQ_FEAT_MASK_GQI_QPL = 0x0,
		GVE_DEV_OPT_REQ_FEAT_MASK_DQO_RDA = 0x0,
		GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES = 0x0,
		GVE_DEV_OPT_REQ_FEAT_MASK_DQO_QPL = 0x0,
		};

		enum gve_sup_feature_mask {

drivers/net/ethernet/google/gve/gve_main.c

+14 −6

Original line number	Diff line number	Diff line
		@@ -31,7 +31,6 @@

		// Minimum amount of time between queue kicks in msec (10 seconds)
		#define MIN_TX_TIMEOUT_GAP (1000 * 10)
		#define DQO_TX_MAX 0x3FFFF

		char gve_driver_name[] = "gve";
		const char gve_version_str[] = GVE_VERSION;
		@@ -494,7 +493,7 @@ static int gve_setup_device_resources(struct gve_priv *priv)
		goto abort_with_stats_report;
		}

		if (priv->queue_format == GVE_DQO_RDA_FORMAT) {
		if (!gve_is_gqi(priv)) {
		priv->ptype_lut_dqo = kvzalloc(sizeof(*priv->ptype_lut_dqo),
		GFP_KERNEL);
		if (!priv->ptype_lut_dqo) {
		@@ -1083,11 +1082,12 @@ static int gve_alloc_xdp_qpls(struct gve_priv *priv)
		static int gve_alloc_qpls(struct gve_priv *priv)
		{
		int max_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
		int page_count;
		int start_id;
		int i, j;
		int err;

		if (priv->queue_format != GVE_GQI_QPL_FORMAT)
		if (!gve_is_qpl(priv))
		return 0;

		priv->qpls = kvcalloc(max_queues, sizeof(*priv->qpls), GFP_KERNEL);
		@@ -1095,17 +1095,25 @@ static int gve_alloc_qpls(struct gve_priv *priv)
		return -ENOMEM;

		start_id = gve_tx_start_qpl_id(priv);
		page_count = priv->tx_pages_per_qpl;
		for (i = start_id; i < start_id + gve_num_tx_qpls(priv); i++) {
		err = gve_alloc_queue_page_list(priv, i,
		priv->tx_pages_per_qpl);
		page_count);
		if (err)
		goto free_qpls;
		}

		start_id = gve_rx_start_qpl_id(priv);

		/* For GQI_QPL number of pages allocated have 1:1 relationship with
		* number of descriptors. For DQO, number of pages required are
		* more than descriptors (because of out of order completions).
		*/
		page_count = priv->queue_format == GVE_GQI_QPL_FORMAT ?
		priv->rx_data_slot_cnt : priv->rx_pages_per_qpl;
		for (i = start_id; i < start_id + gve_num_rx_qpls(priv); i++) {
		err = gve_alloc_queue_page_list(priv, i,
		priv->rx_data_slot_cnt);
		page_count);
		if (err)
		goto free_qpls;
		}
		@@ -2051,7 +2059,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)

		/* Big TCP is only supported on DQ*/
		if (!gve_is_gqi(priv))
		netif_set_tso_max_size(priv->dev, DQO_TX_MAX);
		netif_set_tso_max_size(priv->dev, GVE_DQO_TX_MAX);

		priv->num_registered_pages = 0;
		priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;