Commit aaa55706 authored by David S. Miller's avatar David S. Miller
Browse files

Merge tag 'mlx5-updates-2021-10-18' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux



Saeed Mahameed says:

mlx5-updates-2021-10-18

Maor Maor Gottlieb says:
========================
Use hash to select the affinity port in VF LAG

Current VF LAG architecture is based on QP association with a port.
QP must be created after LAG is enabled to allow association with non-native port.
VM Packets going on slow-path to eSwicth manager (SW path or hairpin) will be transmitted
through a different QP than the VM. This means that Different packets of the same flow might
egress from different physical ports.

This patch-set solves this issue by moving the port selection to be based on the hash function
defined by the bond.

When the device is moved to VF LAG mode, the driver creates TTC (traffic type classifier) flow
tables in order to classify the packet and steer it to the relevant hash function. Similar to what
is done in the mlx5 RSS implementation.

Each rule in the TTC table, forwards the packet to port selection flow table which has one hash
split flow group which contains two "catch all" flow table entries. Each entry point to the
relative uplink port. As shown below:

		-------------------
		| FT              |
TTC rule ->	|     ----------- |
		|   FG|   FTE --|-|-----> uplink of port #1
		|     |   FTE --|-|-----> uplink of port #2
		|     ----------- |
		-------------------

Hash split flow group is flow group that created as type of HASH_SPLIT and associated with match definer.
The match definer define the fields which included in the hash calculation.

The driver creates the match definer according to the xmit hash policy of the bond driver.

Patches overview:
========================

Minor E-Switch updates:
- Patch #12, dynamic  allocation of dest array
- Patch #13, increase number of forward destinations to 32

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 4a6c396e d40bfedd
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -14,7 +14,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
		health.o mcg.o cq.o alloc.o port.o mr.o pd.o \
		transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
		fs_counters.o fs_ft_pool.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
		fs_counters.o fs_ft_pool.o rl.o lag/lag.o dev.o events.o wq.o lib/gid.o \
		lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
		diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
		fw_reset.o qos.o lib/tout.o
@@ -37,7 +37,7 @@ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o
mlx5_core-$(CONFIG_MLX5_EN_RXNFC)    += en_fs_ethtool.o
mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
mlx5_core-$(CONFIG_MLX5_ESWITCH)     += lag_mp.o lib/geneve.o lib/port_tun.o \
mlx5_core-$(CONFIG_MLX5_ESWITCH)     += lag/mp.o lag/port_sel.o lib/geneve.o lib/port_tun.o \
					en_rep.o en/rep/bond.o en/mod_hdr.o \
					en/mapping.o
mlx5_core-$(CONFIG_MLX5_CLS_ACT)     += en_tc.o en/rep/tc.o en/rep/neigh.o \
+3 −0
Original line number Diff line number Diff line
@@ -235,6 +235,9 @@ const char *parse_fs_dst(struct trace_seq *p,
	const char *ret = trace_seq_buffer_ptr(p);

	switch (dst->type) {
	case MLX5_FLOW_DESTINATION_TYPE_UPLINK:
		trace_seq_printf(p, "uplink\n");
		break;
	case MLX5_FLOW_DESTINATION_TYPE_VPORT:
		trace_seq_printf(p, "vport=%u\n", dst->vport.num);
		break;
+1 −1
Original line number Diff line number Diff line
@@ -433,7 +433,7 @@ enum mlx5_flow_match_level {
};

/* current maximum for flow based vport multicasting */
#define MLX5_MAX_FLOW_FWD_VPORTS 2
#define MLX5_MAX_FLOW_FWD_VPORTS 32

enum {
	MLX5_ESW_DEST_ENCAP         = BIT(0),
+14 −2
Original line number Diff line number Diff line
@@ -482,12 +482,12 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
				struct mlx5_flow_spec *spec,
				struct mlx5_flow_attr *attr)
{
	struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
	struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
	struct mlx5_fs_chains *chains = esw_chains(esw);
	bool split = !!(esw_attr->split_count);
	struct mlx5_vport_tbl_attr fwd_attr;
	struct mlx5_flow_destination *dest;
	struct mlx5_flow_handle *rule;
	struct mlx5_flow_table *fdb;
	int i = 0;
@@ -495,6 +495,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
	if (esw->mode != MLX5_ESWITCH_OFFLOADS)
		return ERR_PTR(-EOPNOTSUPP);

	dest = kcalloc(MLX5_MAX_FLOW_FWD_VPORTS + 1, sizeof(*dest), GFP_KERNEL);
	if (!dest)
		return ERR_PTR(-ENOMEM);

	flow_act.action = attr->action;
	/* if per flow vlan pop/push is emulated, don't set that into the firmware */
	if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
@@ -574,6 +578,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
	else
		atomic64_inc(&esw->offloads.num_flows);

	kfree(dest);
	return rule;

err_add_rule:
@@ -584,6 +589,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
err_esw_get:
	esw_cleanup_dests(esw, attr);
err_create_goto_table:
	kfree(dest);
	return rule;
}

@@ -592,16 +598,20 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
			  struct mlx5_flow_spec *spec,
			  struct mlx5_flow_attr *attr)
{
	struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
	struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
	struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
	struct mlx5_fs_chains *chains = esw_chains(esw);
	struct mlx5_vport_tbl_attr fwd_attr;
	struct mlx5_flow_destination *dest;
	struct mlx5_flow_table *fast_fdb;
	struct mlx5_flow_table *fwd_fdb;
	struct mlx5_flow_handle *rule;
	int i, err = 0;

	dest = kcalloc(MLX5_MAX_FLOW_FWD_VPORTS + 1, sizeof(*dest), GFP_KERNEL);
	if (!dest)
		return ERR_PTR(-ENOMEM);

	fast_fdb = mlx5_chains_get_table(chains, attr->chain, attr->prio, 0);
	if (IS_ERR(fast_fdb)) {
		rule = ERR_CAST(fast_fdb);
@@ -654,6 +664,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,

	atomic64_inc(&esw->offloads.num_flows);

	kfree(dest);
	return rule;
err_chain_src_rewrite:
	esw_put_dest_tables_loop(esw, attr, 0, i);
@@ -661,6 +672,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
err_get_fwd:
	mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
err_get_fast:
	kfree(dest);
	return rule;
}

+65 −1
Original line number Diff line number Diff line
@@ -185,6 +185,20 @@ static int mlx5_cmd_set_slave_root_fdb(struct mlx5_core_dev *master,
	return mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
}

static int
mlx5_cmd_stub_destroy_match_definer(struct mlx5_flow_root_namespace *ns,
				    int definer_id)
{
	return 0;
}

static int
mlx5_cmd_stub_create_match_definer(struct mlx5_flow_root_namespace *ns,
				   u16 format_id, u32 *match_mask)
{
	return 0;
}

static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
				   struct mlx5_flow_table *ft, u32 underlay_qpn,
				   bool disconnect)
@@ -563,8 +577,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
			case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
				id = dst->dest_attr.ft->id;
				break;
			case MLX5_FLOW_DESTINATION_TYPE_UPLINK:
			case MLX5_FLOW_DESTINATION_TYPE_VPORT:
				id = dst->dest_attr.vport.num;
				MLX5_SET(dest_format_struct, in_dests,
					 destination_eswitch_owner_vhca_id_valid,
					 !!(dst->dest_attr.vport.flags &
@@ -572,6 +586,12 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
				MLX5_SET(dest_format_struct, in_dests,
					 destination_eswitch_owner_vhca_id,
					 dst->dest_attr.vport.vhca_id);
				if (type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) {
					/* destination_id is reserved */
					id = 0;
					break;
				}
				id = dst->dest_attr.vport.num;
				if (extended_dest &&
				    dst->dest_attr.vport.pkt_reformat) {
					MLX5_SET(dest_format_struct, in_dests,
@@ -909,6 +929,45 @@ static void mlx5_cmd_modify_header_dealloc(struct mlx5_flow_root_namespace *ns,
	mlx5_cmd_exec_in(dev, dealloc_modify_header_context, in);
}

static int mlx5_cmd_destroy_match_definer(struct mlx5_flow_root_namespace *ns,
					  int definer_id)
{
	u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];

	MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
		 MLX5_OBJ_TYPE_MATCH_DEFINER);
	MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, definer_id);

	return mlx5_cmd_exec(ns->dev, in, sizeof(in), out, sizeof(out));
}

static int mlx5_cmd_create_match_definer(struct mlx5_flow_root_namespace *ns,
					 u16 format_id, u32 *match_mask)
{
	u32 out[MLX5_ST_SZ_DW(create_match_definer_out)] = {};
	u32 in[MLX5_ST_SZ_DW(create_match_definer_in)] = {};
	struct mlx5_core_dev *dev = ns->dev;
	void *ptr;
	int err;

	MLX5_SET(create_match_definer_in, in, general_obj_in_cmd_hdr.opcode,
		 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
	MLX5_SET(create_match_definer_in, in, general_obj_in_cmd_hdr.obj_type,
		 MLX5_OBJ_TYPE_MATCH_DEFINER);

	ptr = MLX5_ADDR_OF(create_match_definer_in, in, obj_context);
	MLX5_SET(match_definer, ptr, format_id, format_id);

	ptr = MLX5_ADDR_OF(match_definer, ptr, match_mask);
	memcpy(ptr, match_mask, MLX5_FLD_SZ_BYTES(match_definer, match_mask));

	err = mlx5_cmd_exec_inout(dev, create_match_definer, in, out);
	return err ? err : MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
}

static const struct mlx5_flow_cmds mlx5_flow_cmds = {
	.create_flow_table = mlx5_cmd_create_flow_table,
	.destroy_flow_table = mlx5_cmd_destroy_flow_table,
@@ -923,6 +982,8 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds = {
	.packet_reformat_dealloc = mlx5_cmd_packet_reformat_dealloc,
	.modify_header_alloc = mlx5_cmd_modify_header_alloc,
	.modify_header_dealloc = mlx5_cmd_modify_header_dealloc,
	.create_match_definer = mlx5_cmd_create_match_definer,
	.destroy_match_definer = mlx5_cmd_destroy_match_definer,
	.set_peer = mlx5_cmd_stub_set_peer,
	.create_ns = mlx5_cmd_stub_create_ns,
	.destroy_ns = mlx5_cmd_stub_destroy_ns,
@@ -942,6 +1003,8 @@ static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
	.packet_reformat_dealloc = mlx5_cmd_stub_packet_reformat_dealloc,
	.modify_header_alloc = mlx5_cmd_stub_modify_header_alloc,
	.modify_header_dealloc = mlx5_cmd_stub_modify_header_dealloc,
	.create_match_definer = mlx5_cmd_stub_create_match_definer,
	.destroy_match_definer = mlx5_cmd_stub_destroy_match_definer,
	.set_peer = mlx5_cmd_stub_set_peer,
	.create_ns = mlx5_cmd_stub_create_ns,
	.destroy_ns = mlx5_cmd_stub_destroy_ns,
@@ -969,6 +1032,7 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type typ
	case FS_FT_NIC_TX:
	case FS_FT_RDMA_RX:
	case FS_FT_RDMA_TX:
	case FS_FT_PORT_SEL:
		return mlx5_fs_cmd_get_fw_cmds();
	default:
		return mlx5_fs_cmd_get_stub_cmds();
Loading