Commit 2b5715fc authored by Nicolas Morey-Chaisemartin's avatar Nicolas Morey-Chaisemartin Committed by Jason Gunthorpe
Browse files

RDMA/srp: Fix support for unpopulated and unbalanced NUMA nodes

The current code computes a number of channels per SRP target and spreads
them equally across all online NUMA nodes.  Each channel is then assigned
a CPU within this node.

In the case of unbalanced, or even unpopulated nodes, some channels do not
get a CPU associated and thus do not get connected.  This causes the SRP
connection to fail.

This patch solves the issue by rewriting channel computation and
allocation:

- Drop channel to node/CPU association as it had no real effect on
  locality but added unnecessary complexity.

- Tweak the number of channels allocated to reduce CPU contention when
  possible:
  - Up to one channel per CPU (instead of up to 4 by node)
  - At least 4 channels per node, unless ch_count module parameter is
    used.

Link: https://lore.kernel.org/r/9cb4d9d3-30ad-2276-7eff-e85f7ddfb411@suse.com


Signed-off-by: default avatarNicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
Reviewed-by: default avatarBart Van Assche <bvanassche@acm.org>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 68ad4d1c
Loading
Loading
Loading
Loading
+45 −65
Original line number Diff line number Diff line
@@ -3628,7 +3628,7 @@ static ssize_t srp_create_target(struct device *dev,
	struct srp_rdma_ch *ch;
	struct srp_device *srp_dev = host->srp_dev;
	struct ib_device *ibdev = srp_dev->dev;
	int ret, node_idx, node, cpu, i;
	int ret, i, ch_idx;
	unsigned int max_sectors_per_mr, mr_per_cmd = 0;
	bool multich = false;
	uint32_t max_iu_len;
@@ -3753,39 +3753,23 @@ static ssize_t srp_create_target(struct device *dev,
		goto out;

	ret = -ENOMEM;
	if (target->ch_count == 0)
	if (target->ch_count == 0) {
		target->ch_count =
			max_t(unsigned int, num_online_nodes(),
			min(ch_count ?:
					  min(4 * num_online_nodes(),
				max(4 * num_online_nodes(),
				    ibdev->num_comp_vectors),
				  num_online_cpus()));
				num_online_cpus());
	}

	target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
			     GFP_KERNEL);
	if (!target->ch)
		goto out;

	node_idx = 0;
	for_each_online_node(node) {
		const int ch_start = (node_idx * target->ch_count /
				      num_online_nodes());
		const int ch_end = ((node_idx + 1) * target->ch_count /
				    num_online_nodes());
		const int cv_start = node_idx * ibdev->num_comp_vectors /
				     num_online_nodes();
		const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors /
				   num_online_nodes();
		int cpu_idx = 0;

		for_each_online_cpu(cpu) {
			if (cpu_to_node(cpu) != node)
				continue;
			if (ch_start + cpu_idx >= ch_end)
				continue;
			ch = &target->ch[ch_start + cpu_idx];
	for (ch_idx = 0; ch_idx < target->ch_count; ++ch_idx) {
		ch = &target->ch[ch_idx];
		ch->target = target;
			ch->comp_vector = cv_start == cv_end ? cv_start :
				cv_start + cpu_idx % (cv_end - cv_start);
		ch->comp_vector = ch_idx % ibdev->num_comp_vectors;
		spin_lock_init(&ch->lock);
		INIT_LIST_HEAD(&ch->free_tx);
		ret = srp_new_cm_id(ch);
@@ -3812,9 +3796,9 @@ static ssize_t srp_create_target(struct device *dev,
					target->ib_cm.orig_dgid.raw);
			shost_printk(KERN_ERR, target->scsi_host,
				PFX "Connection %d/%d to %s failed\n",
					     ch_start + cpu_idx,
				ch_idx,
				target->ch_count, dst);
				if (node_idx == 0 && cpu_idx == 0) {
			if (ch_idx == 0) {
				goto free_ch;
			} else {
				srp_free_ch_ib(target, ch);
@@ -3823,11 +3807,7 @@ static ssize_t srp_create_target(struct device *dev,
				goto connected;
			}
		}

		multich = true;
			cpu_idx++;
		}
		node_idx++;
	}

connected: