Unverified Commit a711cdaf authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!3320 Make the rcache depot scale better

Merge Pull Request from: @ci-robot 
 
PR sync from: Zhang Zekun <zhangzekun11@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/IZ5NKOTTMTI6SR3WLRBGI4RKB2PE7QUF/ 
Recently we have meet a problem when running FIO test.
In our kunpeng server with 320 cores, about 80% cpus comes to-
100% usage, and soft lockup appears in the kernel message,
which shows it stuck in __alloc_and_insert_iova_range().
Both the call trace and high cpu occupy rate implys that the
iova_rcache behaves poorly to alloc iovas.

A similar problem has been addressed in early months this year,
the solution is just to enlarge the MAX_GLOBAL_MAGS to 128, but
it requires to growth with the cpu cores, and it is hard to set
a accurate val for a specific machine. So, it is better to
use solutions from the linux community, use a list to replace
the array of iova_rcache->depot. It will push iova_magazine
to depot list when the local cpu_rcache is full, and use
schedul_delayed_work() to free it after 100ms. The minimum length
of the depot list is the num of online_cpus().

The performance of this patch shows good. This patch set use a
lot of timers to record the iova_magazine, and the delay of timers
in system may increase when undering heavy work load, but
it should not cause problem as timers itself allows a inaccurate
delay for a range.

v2:
- change a comment in patch 5/6, no code change.

We need to merge "iommu/iova: change IOVA_MAG_SIZE to 127 to save
memory" first to resolve a compile error:

error: static assertion failed: "!(sizeof(struct iova_magazine) & (sizeof(struct iova_magazine) - 1))"

Feng Tang (1):
  iommu/iova: change IOVA_MAG_SIZE to 127 to save memory

Zhang Zekun (5):
  Revert "iommu/iova: move IOVA_MAX_GLOBAL_MAGS outside of
    IOMMU_SUPPORT"
  Revert "config: enable set the max iova mag size to 128"
  Revert "iommu/iova: increase the iova_rcache depot max size to 128"
  iommu/iova: Make the rcache depot scale better
  iommu/iova: Manage the depot list size


-- 
2.17.1
 
https://gitee.com/openeuler/kernel/issues/I8KS9A 
 
Link:https://gitee.com/openeuler/kernel/pulls/3320

 

Reviewed-by: default avatarWeilong Chen <chenweilong@huawei.com>
Signed-off-by: default avatarJialin Zhang <zhangjialin11@huawei.com>
parents 08b1347b 4dfc6967
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -5994,7 +5994,6 @@ CONFIG_ARM_SMMU_V3_PM=y
# CONFIG_QCOM_IOMMU is not set
# CONFIG_VIRTIO_IOMMU is not set
CONFIG_SMMU_BYPASS_DEV=y
CONFIG_IOVA_MAX_GLOBAL_MAGS=128

#
# Remoteproc drivers
+0 −10
Original line number Diff line number Diff line
@@ -439,13 +439,3 @@ config SMMU_BYPASS_DEV
	  upstreamed in mainline.

endif # IOMMU_SUPPORT

config IOVA_MAX_GLOBAL_MAGS
	int "Set the max iova global magzines in iova rcache"
	range 16 2048
	default "32"
	help
	  Iova rcache global magizine is shared among every cpu. The size of
	  it can be a bottle neck when lots of cpus are contending to use it.
	  If you are suffering from the speed of allocing iova with more than
	  128 cpus, try to tune this config larger.
+60 −20
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@
#include <linux/smp.h>
#include <linux/bitops.h>
#include <linux/cpu.h>
#include <linux/workqueue.h>

/* The anchor node sits above the top of the usable address space */
#define IOVA_ANCHOR	~0UL
@@ -792,12 +793,24 @@ split_and_remove_iova(struct iova_domain *iovad, struct iova *iova,
 * dynamic size tuning described in the paper.
 */

#define IOVA_MAG_SIZE 128
/*
 * As kmalloc's buffer size is fixed to power of 2, 127 is chosen to
 * assure size of 'iova_magazine' to be 1024 bytes, so that no memory
 * will be wasted. Since only full magazines are inserted into the depot,
 * we don't need to waste PFN capacity on a separate list head either.
 */
#define IOVA_MAG_SIZE 127

#define IOVA_DEPOT_DELAY msecs_to_jiffies(100)

struct iova_magazine {
	union {
		unsigned long size;
		struct iova_magazine *next;
	};
	unsigned long pfns[IOVA_MAG_SIZE];
};
static_assert(!(sizeof(struct iova_magazine) & (sizeof(struct iova_magazine) - 1)));

struct iova_cpu_rcache {
	spinlock_t lock;
@@ -877,6 +890,41 @@ static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
	mag->pfns[mag->size++] = pfn;
}

static struct iova_magazine *iova_depot_pop(struct iova_rcache *rcache)
{
	struct iova_magazine *mag = rcache->depot;

	rcache->depot = mag->next;
	mag->size = IOVA_MAG_SIZE;
	rcache->depot_size--;
	return mag;
}

static void iova_depot_push(struct iova_rcache *rcache, struct iova_magazine *mag)
{
	mag->next = rcache->depot;
	rcache->depot = mag;
	rcache->depot_size++;
}

static void iova_depot_work_func(struct work_struct *work)
{
	struct iova_rcache *rcache = container_of(work, typeof(*rcache), work.work);
	struct iova_magazine *mag = NULL;
	unsigned long flags;

	spin_lock_irqsave(&rcache->lock, flags);
	if (rcache->depot_size > num_online_cpus())
		mag = iova_depot_pop(rcache);
	spin_unlock_irqrestore(&rcache->lock, flags);

	if (mag) {
		iova_magazine_free_pfns(mag, rcache->iovad);
		iova_magazine_free(mag);
		schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY);
	}
}

static void init_iova_rcaches(struct iova_domain *iovad)
{
	struct iova_cpu_rcache *cpu_rcache;
@@ -887,7 +935,8 @@ static void init_iova_rcaches(struct iova_domain *iovad)
	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
		rcache = &iovad->rcaches[i];
		spin_lock_init(&rcache->lock);
		rcache->depot_size = 0;
		rcache->iovad = iovad;
		INIT_DELAYED_WORK(&rcache->work, iova_depot_work_func);
		rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size());
		if (WARN_ON(!rcache->cpu_rcaches))
			continue;
@@ -910,7 +959,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
				 struct iova_rcache *rcache,
				 unsigned long iova_pfn)
{
	struct iova_magazine *mag_to_free = NULL;
	struct iova_cpu_rcache *cpu_rcache;
	bool can_insert = false;
	unsigned long flags;
@@ -928,13 +976,9 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,

		if (new_mag) {
			spin_lock(&rcache->lock);
			if (rcache->depot_size < MAX_GLOBAL_MAGS) {
				rcache->depot[rcache->depot_size++] =
						cpu_rcache->loaded;
			} else {
				mag_to_free = cpu_rcache->loaded;
			}
			iova_depot_push(rcache, cpu_rcache->loaded);
			spin_unlock(&rcache->lock);
			schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY);

			cpu_rcache->loaded = new_mag;
			can_insert = true;
@@ -946,11 +990,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,

	spin_unlock_irqrestore(&cpu_rcache->lock, flags);

	if (mag_to_free) {
		iova_magazine_free_pfns(mag_to_free, iovad);
		iova_magazine_free(mag_to_free);
	}

	return can_insert;
}

@@ -988,9 +1027,9 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
		has_pfn = true;
	} else {
		spin_lock(&rcache->lock);
		if (rcache->depot_size > 0) {
		if (rcache->depot) {
			iova_magazine_free(cpu_rcache->loaded);
			cpu_rcache->loaded = rcache->depot[--rcache->depot_size];
			cpu_rcache->loaded = iova_depot_pop(rcache);
			has_pfn = true;
		}
		spin_unlock(&rcache->lock);
@@ -1029,7 +1068,7 @@ static void free_iova_rcaches(struct iova_domain *iovad)
	struct iova_rcache *rcache;
	struct iova_cpu_rcache *cpu_rcache;
	unsigned int cpu;
	int i, j;
	int i;

	for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
		rcache = &iovad->rcaches[i];
@@ -1039,8 +1078,9 @@ static void free_iova_rcaches(struct iova_domain *iovad)
			iova_magazine_free(cpu_rcache->prev);
		}
		free_percpu(rcache->cpu_rcaches);
		for (j = 0; j < rcache->depot_size; ++j)
			iova_magazine_free(rcache->depot[j]);
		cancel_delayed_work_sync(&rcache->work);
		while (rcache->depot)
			iova_magazine_free(iova_depot_pop(rcache));
	}
}

+4 −3
Original line number Diff line number Diff line
@@ -26,13 +26,14 @@ struct iova_magazine;
struct iova_cpu_rcache;

#define IOVA_RANGE_CACHE_MAX_SIZE 6	/* log of max cached IOVA range size (in pages) */
#define MAX_GLOBAL_MAGS CONFIG_IOVA_MAX_GLOBAL_MAGS	/* magazines per bin */

struct iova_rcache {
	spinlock_t lock;
	unsigned long depot_size;
	struct iova_magazine *depot[MAX_GLOBAL_MAGS];
	unsigned int depot_size;
	struct iova_magazine *depot;
	struct iova_cpu_rcache __percpu *cpu_rcaches;
	struct iova_domain *iovad;
	struct delayed_work work;
};

struct iova_domain;