Commit 7f46a0b7 authored by Mark Bloch's avatar Mark Bloch Committed by Saeed Mahameed
Browse files

net/mlx5: Lag, add debugfs to query hardware lag state



Lag state has become very complicated with many modes, flags, types and
port selections methods and future work will add additional features.

Add a debugfs to query the current lag state. A new directory named "lag"
will be created under the mlx5 debugfs directory. As the driver has
debugfs per pci function the location will be: <debugfs>/mlx5/<BDF>/lag

For example:
/sys/kernel/debug/mlx5/0000:08:00.0/lag

The following files are exposed:

- state: Returns "active" or "disabled". If "active" it means hardware
         lag is active.

- members: Returns the BDFs of all the members of lag object.

- type: Returns the type of the lag currently configured. Valid only
	if hardware lag is active.
	* "roce" - Members are bare metal PFs.
	* "switchdev" - Members are in switchdev mode.
	* "multipath" - ECMP offloads.

- port_sel_mode: Returns the egress port selection method, valid
		 only if hardware lag is active.
		 * "queue_affinity" - Egress port is selected by
		   the QP/SQ affinity.
		 * "hash" - Egress port is selected by hash done on
		   each packet. Controlled by: xmit_hash_policy of the
		   bond device.
- flags: Returns flags that are specific per lag @type. Valid only if
	 hardware lag is active.
	 * "shared_fdb" - "on" or "off", if "on" single FDB is used.

- mapping: Returns the mapping which is used to select egress port.
	   Valid only if hardware lag is active.
	   If @port_sel_mode is "hash" returns the active egress ports.
	   The hash result will select only active ports.
	   if @port_sel_mode is "queue_affinity" returns the mapping
	   between the configured port affinity of the QP/SQ and actual
	   egress port. For example:
	   * 1:1 - Mapping means if the configured affinity is port 1
	           traffic will egress via port 1.
	   * 1:2 - Mapping means if the configured affinity is port 1
		   traffic will egress via port 2. This can happen
		   if port 1 is down or in active/backup mode and port 1
		   is backup.

Signed-off-by: default avatarMark Bloch <mbloch@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 352899f3
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -14,7 +14,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
		health.o mcg.o cq.o alloc.o port.o mr.o pd.o \
		transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
		fs_counters.o fs_ft_pool.o rl.o lag/lag.o dev.o events.o wq.o lib/gid.o \
		fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \
		lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
		diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
		fw_reset.o qos.o lib/tout.o
+173 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */

#include "lag.h"

static char *get_str_mode_type(struct mlx5_lag *ldev)
{
	if (ldev->flags & MLX5_LAG_FLAG_ROCE)
		return "roce";
	if (ldev->flags & MLX5_LAG_FLAG_SRIOV)
		return "switchdev";
	if (ldev->flags & MLX5_LAG_FLAG_MULTIPATH)
		return "multipath";

	return NULL;
}

static int type_show(struct seq_file *file, void *priv)
{
	struct mlx5_core_dev *dev = file->private;
	struct mlx5_lag *ldev;
	char *mode = NULL;

	ldev = dev->priv.lag;
	mutex_lock(&ldev->lock);
	if (__mlx5_lag_is_active(ldev))
		mode = get_str_mode_type(ldev);
	mutex_unlock(&ldev->lock);
	if (!mode)
		return -EINVAL;
	seq_printf(file, "%s\n", mode);

	return 0;
}

static int port_sel_mode_show(struct seq_file *file, void *priv)
{
	struct mlx5_core_dev *dev = file->private;
	struct mlx5_lag *ldev;
	int ret = 0;
	char *mode;

	ldev = dev->priv.lag;
	mutex_lock(&ldev->lock);
	if (__mlx5_lag_is_active(ldev))
		mode = get_str_port_sel_mode(ldev->flags);
	else
		ret = -EINVAL;
	mutex_unlock(&ldev->lock);
	if (ret || !mode)
		return ret;

	seq_printf(file, "%s\n", mode);
	return 0;
}

static int state_show(struct seq_file *file, void *priv)
{
	struct mlx5_core_dev *dev = file->private;
	struct mlx5_lag *ldev;
	bool active;

	ldev = dev->priv.lag;
	mutex_lock(&ldev->lock);
	active = __mlx5_lag_is_active(ldev);
	mutex_unlock(&ldev->lock);
	seq_printf(file, "%s\n", active ? "active" : "disabled");
	return 0;
}

static int flags_show(struct seq_file *file, void *priv)
{
	struct mlx5_core_dev *dev = file->private;
	struct mlx5_lag *ldev;
	bool shared_fdb;
	bool lag_active;

	ldev = dev->priv.lag;
	mutex_lock(&ldev->lock);
	lag_active = __mlx5_lag_is_active(ldev);
	if (lag_active)
		shared_fdb = ldev->shared_fdb;

	mutex_unlock(&ldev->lock);
	if (!lag_active)
		return -EINVAL;

	seq_printf(file, "%s:%s\n", "shared_fdb", shared_fdb ? "on" : "off");
	return 0;
}

static int mapping_show(struct seq_file *file, void *priv)
{
	struct mlx5_core_dev *dev = file->private;
	u8 ports[MLX5_MAX_PORTS] = {};
	struct mlx5_lag *ldev;
	bool hash = false;
	bool lag_active;
	int num_ports;
	int i;

	ldev = dev->priv.lag;
	mutex_lock(&ldev->lock);
	lag_active = __mlx5_lag_is_active(ldev);
	if (lag_active) {
		if (ldev->flags & MLX5_LAG_FLAG_HASH_BASED) {
			mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, ports,
					      &num_ports);
			hash = true;
		} else {
			for (i = 0; i < ldev->ports; i++)
				ports[i] = ldev->v2p_map[i];
			num_ports = ldev->ports;
		}
	}
	mutex_unlock(&ldev->lock);
	if (!lag_active)
		return -EINVAL;

	for (i = 0; i < num_ports; i++) {
		if (hash)
			seq_printf(file, "%d\n", ports[i] + 1);
		else
			seq_printf(file, "%d:%d\n", i + 1, ports[i]);
	}

	return 0;
}

static int members_show(struct seq_file *file, void *priv)
{
	struct mlx5_core_dev *dev = file->private;
	struct mlx5_lag *ldev;
	int i;

	ldev = dev->priv.lag;
	mutex_lock(&ldev->lock);
	for (i = 0; i < ldev->ports; i++) {
		if (!ldev->pf[i].dev)
			continue;
		seq_printf(file, "%s\n", dev_name(ldev->pf[i].dev->device));
	}
	mutex_unlock(&ldev->lock);

	return 0;
}

DEFINE_SHOW_ATTRIBUTE(type);
DEFINE_SHOW_ATTRIBUTE(port_sel_mode);
DEFINE_SHOW_ATTRIBUTE(state);
DEFINE_SHOW_ATTRIBUTE(flags);
DEFINE_SHOW_ATTRIBUTE(mapping);
DEFINE_SHOW_ATTRIBUTE(members);

void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev)
{
	struct dentry *dbg;

	dbg = debugfs_create_dir("lag", mlx5_debugfs_get_dev_root(dev));
	dev->priv.dbg.lag_debugfs = dbg;

	debugfs_create_file("type", 0444, dbg, dev, &type_fops);
	debugfs_create_file("port_sel_mode", 0444, dbg, dev, &port_sel_mode_fops);
	debugfs_create_file("state", 0444, dbg, dev, &state_fops);
	debugfs_create_file("flags", 0444, dbg, dev, &flags_fops);
	debugfs_create_file("mapping", 0444, dbg, dev, &mapping_fops);
	debugfs_create_file("members", 0444, dbg, dev, &members_fops);
}

void mlx5_ldev_remove_debugfs(struct dentry *dbg)
{
	debugfs_remove_recursive(dbg);
}
+8 −3
Original line number Diff line number Diff line
@@ -120,7 +120,7 @@ static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports,
	}
}

static void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
			   u8 *ports, int *num_enabled)
{
	int i;
@@ -454,7 +454,7 @@ static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev,
	return mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, flags);
}

static char *get_str_port_sel_mode(u8 flags)
char *get_str_port_sel_mode(u8 flags)
{
	if (flags &  MLX5_LAG_FLAG_HASH_BASED)
		return "hash";
@@ -1106,6 +1106,10 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
	if (!ldev)
		return;

	/* mdev is being removed, might as well remove debugfs
	 * as early as possible.
	 */
	mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
recheck:
	mutex_lock(&ldev->lock);
	if (ldev->mode_changes_in_progress) {
@@ -1137,6 +1141,7 @@ void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
		msleep(100);
		goto recheck;
	}
	mlx5_ldev_add_debugfs(dev);
}

void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
+9 −0
Original line number Diff line number Diff line
@@ -4,6 +4,8 @@
#ifndef __MLX5_LAG_H__
#define __MLX5_LAG_H__

#include <linux/debugfs.h>

#define MLX5_LAG_MAX_HASH_BUCKETS 16
#include "mlx5_core.h"
#include "mp.h"
@@ -90,4 +92,11 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
				struct net_device *ndev);

char *get_str_port_sel_mode(u8 flags);
void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
			   u8 *ports, int *num_enabled);

void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev);
void mlx5_ldev_remove_debugfs(struct dentry *dbg);

#endif /* __MLX5_LAG_H__ */
+1 −0
Original line number Diff line number Diff line
@@ -558,6 +558,7 @@ struct mlx5_debugfs_entries {
	struct dentry *cq_debugfs;
	struct dentry *cmdif_debugfs;
	struct dentry *pages_debugfs;
	struct dentry *lag_debugfs;
};

struct mlx5_ft_pool;