Commit aa866ee4 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge tag 'mlx5-fixes-2023-05-24' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
mlx5 fixes 2023-05-24

This series includes bug fixes for the mlx5 driver.

* tag 'mlx5-fixes-2023-05-24' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux:
  Documentation: net/mlx5: Wrap notes in admonition blocks
  Documentation: net/mlx5: Add blank line separator before numbered lists
  Documentation: net/mlx5: Use bullet and definition lists for vnic counters description
  Documentation: net/mlx5: Wrap vnic reporter devlink commands in code blocks
  net/mlx5: Fix check for allocation failure in comp_irqs_request_pci()
  net/mlx5: DR, Add missing mutex init/destroy in pattern manager
  net/mlx5e: Move Ethernet driver debugfs to profile init callback
  net/mlx5e: Don't attach netdev profile while handling internal error
  net/mlx5: Fix post parse infra to only parse every action once
  net/mlx5e: Use query_special_contexts cmd only once per mdev
  net/mlx5: fw_tracer, Fix event handling
  net/mlx5: SF, Drain health before removing device
  net/mlx5: Drain health before unregistering devlink
  net/mlx5e: Do not update SBCM when prio2buffer command is invalid
  net/mlx5e: Consider internal buffers size in port buffer calculations
  net/mlx5e: Prevent encap offload when neigh update is running
  net/mlx5e: Extract remaining tunnel encap code to dedicated file
====================

Link: https://lore.kernel.org/r/20230525034847.99268-1-saeed@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 822b5a1c bb72b94c
Loading
Loading
Loading
Loading
+37 −23
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@ flow_steering_mode: Device flow steering mode
---------------------------------------------
The flow steering mode parameter controls the flow steering mode of the driver.
Two modes are supported:

1. 'dmfs' - Device managed flow steering.
2. 'smfs' - Software/Driver managed flow steering.

@@ -99,6 +100,7 @@ between representors and stacked devices.
By default metadata is enabled on the supported devices in E-switch.
Metadata is applicable only for E-switch in switchdev mode and
users may disable it when NONE of the below use cases will be in use:

1. HCA is in Dual/multi-port RoCE mode.
2. VF/SF representor bonding (Usually used for Live migration)
3. Stacked devices
@@ -180,7 +182,8 @@ User commands examples:

    $ devlink health diagnose pci/0000:82:00.0 reporter tx

NOTE: This command has valid output only when interface is up, otherwise the command has empty output.
.. note::
   This command has valid output only when interface is up, otherwise the command has empty output.

- Show number of tx errors indicated, number of recover flows ended successfully,
  is autorecover enabled and graceful period from last recover::
@@ -232,7 +235,8 @@ User commands examples:

    $ devlink health dump show pci/0000:82:00.0 reporter fw

NOTE: This command can run only on the PF which has fw tracer ownership,
.. note::
   This command can run only on the PF which has fw tracer ownership,
   running it on other PF or any VF will return "Operation not permitted".

fw fatal reporter
@@ -256,7 +260,8 @@ User commands examples:

    $ devlink health dump show pci/0000:82:00.1 reporter fw_fatal

NOTE: This command can run only on PF.
.. note::
   This command can run only on PF.

vnic reporter
-------------
@@ -265,28 +270,37 @@ It is responsible for querying the vnic diagnostic counters from fw and displayi
them in realtime.

Description of the vnic counters:
total_q_under_processor_handle: number of queues in an error state due to

- total_q_under_processor_handle
        number of queues in an error state due to
        an async error or errored command.
send_queue_priority_update_flow: number of QP/SQ priority/SL update
events.
cq_overrun: number of times CQ entered an error state due to an
overflow.
async_eq_overrun: number of times an EQ mapped to async events was
overrun.
comp_eq_overrun: number of times an EQ mapped to completion events was
- send_queue_priority_update_flow
        number of QP/SQ priority/SL update events.
- cq_overrun
        number of times CQ entered an error state due to an overflow.
- async_eq_overrun
        number of times an EQ mapped to async events was overrun.
        comp_eq_overrun number of times an EQ mapped to completion events was
        overrun.
quota_exceeded_command: number of commands issued and failed due to quota
- quota_exceeded_command
        number of commands issued and failed due to quota exceeded.
- invalid_command
        number of commands issued and failed dues to any reason other than quota
        exceeded.
invalid_command: number of commands issued and failed dues to any reason
other than quota exceeded.
nic_receive_steering_discard: number of packets that completed RX flow
- nic_receive_steering_discard
        number of packets that completed RX flow
        steering but were discarded due to a mismatch in flow table.

User commands examples:
- Diagnose PF/VF vnic counters

- Diagnose PF/VF vnic counters::

        $ devlink health diagnose pci/0000:82:00.1 reporter vnic

- Diagnose representor vnic counters (performed by supplying devlink port of the
  representor, which can be obtained via devlink port command)
  representor, which can be obtained via devlink port command)::

        $ devlink health diagnose pci/0000:82:00.1/65537 reporter vnic

NOTE: This command can run over all interfaces such as PF/VF and representor ports.
.. note::
   This command can run over all interfaces such as PF/VF and representor ports.
+1 −1
Original line number Diff line number Diff line
@@ -490,7 +490,7 @@ static void poll_trace(struct mlx5_fw_tracer *tracer,
				(u64)timestamp_low;
		break;
	default:
		if (tracer_event->event_id >= tracer->str_db.first_string_trace ||
		if (tracer_event->event_id >= tracer->str_db.first_string_trace &&
		    tracer_event->event_id <= tracer->str_db.first_string_trace +
					      tracer->str_db.num_string_trace) {
			tracer_event->type = TRACER_EVENT_TYPE_STRING;
+1 −0
Original line number Diff line number Diff line
@@ -327,6 +327,7 @@ struct mlx5e_params {
	unsigned int sw_mtu;
	int hard_mtu;
	bool ptp_rx;
	__be32 terminate_lkey_be;
};

static inline u8 mlx5e_get_dcb_num_tc(struct mlx5e_params *params)
+29 −17
Original line number Diff line number Diff line
@@ -51,7 +51,7 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv,
	if (err)
		goto out;

	for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
	for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) {
		buffer = MLX5_ADDR_OF(pbmc_reg, out, buffer[i]);
		port_buffer->buffer[i].lossy =
			MLX5_GET(bufferx_reg, buffer, lossy);
@@ -73,14 +73,24 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv,
			  port_buffer->buffer[i].lossy);
	}

	port_buffer->headroom_size = total_used;
	port_buffer->internal_buffers_size = 0;
	for (i = MLX5E_MAX_NETWORK_BUFFER; i < MLX5E_TOTAL_BUFFERS; i++) {
		buffer = MLX5_ADDR_OF(pbmc_reg, out, buffer[i]);
		port_buffer->internal_buffers_size +=
			MLX5_GET(bufferx_reg, buffer, size) * port_buff_cell_sz;
	}

	port_buffer->port_buffer_size =
		MLX5_GET(pbmc_reg, out, port_buffer_size) * port_buff_cell_sz;
	port_buffer->spare_buffer_size =
		port_buffer->port_buffer_size - total_used;

	mlx5e_dbg(HW, priv, "total buffer size=%d, spare buffer size=%d\n",
		  port_buffer->port_buffer_size,
	port_buffer->headroom_size = total_used;
	port_buffer->spare_buffer_size = port_buffer->port_buffer_size -
					 port_buffer->internal_buffers_size -
					 port_buffer->headroom_size;

	mlx5e_dbg(HW, priv,
		  "total buffer size=%u, headroom buffer size=%u, internal buffers size=%u, spare buffer size=%u\n",
		  port_buffer->port_buffer_size, port_buffer->headroom_size,
		  port_buffer->internal_buffers_size,
		  port_buffer->spare_buffer_size);
out:
	kfree(out);
@@ -206,11 +216,11 @@ static int port_update_pool_cfg(struct mlx5_core_dev *mdev,
	if (!MLX5_CAP_GEN(mdev, sbcam_reg))
		return 0;

	for (i = 0; i < MLX5E_MAX_BUFFER; i++)
	for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++)
		lossless_buff_count += ((port_buffer->buffer[i].size) &&
				       (!(port_buffer->buffer[i].lossy)));

	for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
	for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) {
		p = select_sbcm_params(&port_buffer->buffer[i], lossless_buff_count);
		err = mlx5e_port_set_sbcm(mdev, 0, i,
					  MLX5_INGRESS_DIR,
@@ -293,7 +303,7 @@ static int port_set_buffer(struct mlx5e_priv *priv,
	if (err)
		goto out;

	for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
	for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) {
		void *buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]);
		u64 size = port_buffer->buffer[i].size;
		u64 xoff = port_buffer->buffer[i].xoff;
@@ -351,7 +361,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer,
{
	int i;

	for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
	for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) {
		if (port_buffer->buffer[i].lossy) {
			port_buffer->buffer[i].xoff = 0;
			port_buffer->buffer[i].xon  = 0;
@@ -408,7 +418,7 @@ static int update_buffer_lossy(struct mlx5_core_dev *mdev,
	int err;
	int i;

	for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
	for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) {
		prio_count = 0;
		lossy_count = 0;

@@ -432,11 +442,11 @@ static int update_buffer_lossy(struct mlx5_core_dev *mdev,
	}

	if (changed) {
		err = port_update_pool_cfg(mdev, port_buffer);
		err = update_xoff_threshold(port_buffer, xoff, max_mtu, port_buff_cell_sz);
		if (err)
			return err;

		err = update_xoff_threshold(port_buffer, xoff, max_mtu, port_buff_cell_sz);
		err = port_update_pool_cfg(mdev, port_buffer);
		if (err)
			return err;

@@ -515,7 +525,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,

	if (change & MLX5E_PORT_BUFFER_PRIO2BUFFER) {
		update_prio2buffer = true;
		for (i = 0; i < MLX5E_MAX_BUFFER; i++)
		for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++)
			mlx5e_dbg(HW, priv, "%s: requested to map prio[%d] to buffer %d\n",
				  __func__, i, prio2buffer[i]);

@@ -530,7 +540,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
	}

	if (change & MLX5E_PORT_BUFFER_SIZE) {
		for (i = 0; i < MLX5E_MAX_BUFFER; i++) {
		for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) {
			mlx5e_dbg(HW, priv, "%s: buffer[%d]=%d\n", __func__, i, buffer_size[i]);
			if (!port_buffer.buffer[i].lossy && !buffer_size[i]) {
				mlx5e_dbg(HW, priv, "%s: lossless buffer[%d] size cannot be zero\n",
@@ -544,7 +554,9 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,

		mlx5e_dbg(HW, priv, "%s: total buffer requested=%d\n", __func__, total_used);

		if (total_used > port_buffer.port_buffer_size)
		if (total_used > port_buffer.headroom_size &&
		    (total_used - port_buffer.headroom_size) >
			    port_buffer.spare_buffer_size)
			return -EINVAL;

		update_buffer = true;
+5 −3
Original line number Diff line number Diff line
@@ -35,7 +35,8 @@
#include "en.h"
#include "port.h"

#define MLX5E_MAX_BUFFER 8
#define MLX5E_MAX_NETWORK_BUFFER 8
#define MLX5E_TOTAL_BUFFERS 10
#define MLX5E_DEFAULT_CABLE_LEN 7 /* 7 meters */

#define MLX5_BUFFER_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, pcam_reg) && \
@@ -60,8 +61,9 @@ struct mlx5e_bufferx_reg {
struct mlx5e_port_buffer {
	u32                       port_buffer_size;
	u32                       spare_buffer_size;
	u32                       headroom_size;
	struct mlx5e_bufferx_reg  buffer[MLX5E_MAX_BUFFER];
	u32                       headroom_size;	  /* Buffers 0-7 */
	u32                       internal_buffers_size;  /* Buffers 8-9 */
	struct mlx5e_bufferx_reg  buffer[MLX5E_MAX_NETWORK_BUFFER];
};

int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv,
Loading