Commit 334083f7 authored by Alex Kogan's avatar Alex Kogan Committed by Wei Li
Browse files

locking/qspinlock: Avoid moving certain threads between waiting queues in CNA

maillist inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I8T8XV

Reference: https://lore.kernel.org/linux-arm-kernel/20210514200743.3026725-6-alex.kogan@oracle.com



--------------------------------

Prohibit moving certain threads (e.g., in irq and nmi contexts)
to the secondary queue. Those prioritized threads will always stay
in the primary queue, and so will have a shorter wait time for the lock.

Signed-off-by: default avatarAlex Kogan <alex.kogan@oracle.com>
Reviewed-by: default avatarSteve Sistare <steven.sistare@oracle.com>
Reviewed-by: default avatarWaiman Long <longman@redhat.com>
Signed-off-by: default avatarWei Li <liwei391@huawei.com>
parent 7192e563
Loading
Loading
Loading
Loading
+15 −3
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@
#include <linux/topology.h>
#include <linux/sched/clock.h>
#include <linux/moduleparam.h>
#include <linux/sched/rt.h>

/*
 * Implement a NUMA-aware version of MCS (aka CNA, or compact NUMA-aware lock).
@@ -37,7 +38,8 @@
 * running on the same NUMA node. If it is not, that waiter is detached from the
 * main queue and moved into the tail of the secondary queue. This way, we
 * gradually filter the primary queue, leaving only waiters running on the same
 * preferred NUMA node.
 * preferred NUMA node. Note that certain priortized waiters (e.g., in
 * irq and nmi contexts) are excluded from being moved to the secondary queue.
 *
 * We change the NUMA node preference after a waiter at the head of the
 * secondary queue spins for a certain amount of time (1ms, by default).
@@ -53,6 +55,8 @@

#define FLUSH_SECONDARY_QUEUE	1

#define CNA_PRIORITY_NODE      0xffff

struct cna_node {
	struct mcs_spinlock	mcs;
	u16			numa_node;
@@ -111,9 +115,10 @@ static int __init cna_init_nodes(void)

static __always_inline void cna_init_node(struct mcs_spinlock *node)
{
	bool priority = !in_task() || irqs_disabled() || rt_task(current);
	struct cna_node *cn = (struct cna_node *)node;

	cn->numa_node = cn->real_numa_node;
	cn->numa_node = priority ? CNA_PRIORITY_NODE : cn->real_numa_node;
	cn->start_time = 0;
}

@@ -252,7 +257,7 @@ static int cna_order_queue(struct mcs_spinlock *node)
	numa_node = cn->numa_node;
	next_numa_node = ((struct cna_node *)next)->numa_node;

	if (next_numa_node != numa_node) {
	if (next_numa_node != numa_node && next_numa_node != CNA_PRIORITY_NODE) {
		struct mcs_spinlock *nnext = READ_ONCE(next->next);

		if (nnext)
@@ -272,6 +277,13 @@ static __always_inline u32 cna_wait_head_or_lock(struct qspinlock *lock,
	struct cna_node *cn = (struct cna_node *)node;

	if (!cn->start_time || !intra_node_threshold_reached(cn)) {
		/*
		 * We are at the head of the wait queue, no need to use
		 * the fake NUMA node ID.
		 */
		if (cn->numa_node == CNA_PRIORITY_NODE)
			cn->numa_node = cn->real_numa_node;

		/*
		 * Try and put the time otherwise spent spin waiting on
		 * _Q_LOCKED_PENDING_MASK to use by sorting our lists.