Unverified Commit 070dcfd3 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!4027 [OLK-6.6] Intel RDT non-contiguous CBM support

Merge Pull Request from: @xiaochenshen 
 
**About Intel RDT non-contiguous CBM support:**

GNR and SRF support L3/L2 non-contiguous way masks. Linux kernel changes is needed to support this.

Legacy RDT only support contiguous bits in L3/L2 CAT Cache Bit Mask (CBM) when allocating L3/L2 cache. For example, 0x111 is a valid bit mask but 0x101 is an invalid bit mask. This is limitation of legacy RDT hardware. It limits user's capability to allocate L3/L2 cache efficiently and cause poor performance.

With non-contiguous bits in L3/L2 CAT, we can allocate L3/L2 cache in various ways and utilize the cache efficiently and improve user application performance.

It fixes the issue:
https://gitee.com/openeuler/intel-kernel/issues/I8WO9B

**About the patches:**
There are 4 backported upstream patches:
aaa5fa35743ab9f0726568611a85e3e15349b9bf Documentation/x86: Document resctrl's new sparse_masks
4dba8f10b8fef9c5b0f9ed83dd1af91a1795ead1 x86/resctrl: Add sparse_masks file in info
0e3cd31f6e9074886dea5a999bfcc563d144e7de x86/resctrl: Enable non-contiguous CBMs in Intel CAT
39c6eed1f61594f737160e498d29673edbd9eefd x86/resctrl: Rename arch_has_sparse_bitmaps

**Passed tests:**
Intel RDT non-contiguous CBM CAT tests: passed. 
 
Link:https://gitee.com/openeuler/kernel/pulls/4027

 

Reviewed-by: default avatarJason Zeng <jason.zeng@intel.com>
Signed-off-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
parents 652b917c 54c9c9bb
Loading
Loading
Loading
Loading
+12 −4
Original line number Diff line number Diff line
@@ -124,6 +124,13 @@ related to allocation:
			"P":
			      Corresponding region is pseudo-locked. No
			      sharing allowed.
"sparse_masks":
		Indicates if non-contiguous 1s value in CBM is supported.

			"0":
			      Only contiguous 1s value in CBM is supported.
			"1":
			      Non-contiguous 1s value in CBM is supported.

Memory bandwidth(MB) subdirectory contains the following files
with respect to allocation:
@@ -445,12 +452,13 @@ For cache resources we describe the portion of the cache that is available
for allocation using a bitmask. The maximum value of the mask is defined
by each cpu model (and may be different for different cache levels). It
is found using CPUID, but is also provided in the "info" directory of
the resctrl file system in "info/{resource}/cbm_mask". Intel hardware
the resctrl file system in "info/{resource}/cbm_mask". Some Intel hardware
requires that these masks have all the '1' bits in a contiguous block. So
0x3, 0x6 and 0xC are legal 4-bit masks with two bits set, but 0x5, 0x9
and 0xA are not.  On a system with a 20-bit mask each bit represents 5%
of the capacity of the cache. You could partition the cache into four
equal parts with masks: 0x1f, 0x3e0, 0x7c00, 0xf8000.
and 0xA are not. Check /sys/fs/resctrl/info/{resource}/sparse_masks
if non-contiguous 1s value is supported. On a system with a 20-bit mask
each bit represents 5% of the capacity of the cache. You could partition
the cache into four equal parts with masks: 0x1f, 0x3e0, 0x7c00, 0xf8000.

Memory bandwidth Allocation and monitoring
==========================================
+7 −4
Original line number Diff line number Diff line
@@ -152,6 +152,7 @@ static inline void cache_alloc_hsw_probe(void)
	r->cache.cbm_len = 20;
	r->cache.shareable_bits = 0xc0000;
	r->cache.min_cbm_bits = 2;
	r->cache.arch_has_sparse_bitmasks = false;
	r->alloc_capable = true;

	rdt_alloc_capable = true;
@@ -267,15 +268,18 @@ static void rdt_get_cache_alloc_cfg(int idx, struct rdt_resource *r)
{
	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
	union cpuid_0x10_1_eax eax;
	union cpuid_0x10_x_ecx ecx;
	union cpuid_0x10_x_edx edx;
	u32 ebx, ecx;
	u32 ebx;

	cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx, &edx.full);
	cpuid_count(0x00000010, idx, &eax.full, &ebx, &ecx.full, &edx.full);
	hw_res->num_closid = edx.split.cos_max + 1;
	r->cache.cbm_len = eax.split.cbm_len + 1;
	r->default_ctrl = BIT_MASK(eax.split.cbm_len + 1) - 1;
	r->cache.shareable_bits = ebx & r->default_ctrl;
	r->data_width = (r->cache.cbm_len + 3) / 4;
	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
		r->cache.arch_has_sparse_bitmasks = ecx.split.noncont;
	r->alloc_capable = true;
}

@@ -872,7 +876,6 @@ static __init void rdt_init_res_defs_intel(void)

		if (r->rid == RDT_RESOURCE_L3 ||
		    r->rid == RDT_RESOURCE_L2) {
			r->cache.arch_has_sparse_bitmaps = false;
			r->cache.arch_has_per_cpu_cfg = false;
			r->cache.min_cbm_bits = 1;
		} else if (r->rid == RDT_RESOURCE_MBA) {
@@ -892,7 +895,7 @@ static __init void rdt_init_res_defs_amd(void)

		if (r->rid == RDT_RESOURCE_L3 ||
		    r->rid == RDT_RESOURCE_L2) {
			r->cache.arch_has_sparse_bitmaps = true;
			r->cache.arch_has_sparse_bitmasks = true;
			r->cache.arch_has_per_cpu_cfg = true;
			r->cache.min_cbm_bits = 0;
		} else if (r->rid == RDT_RESOURCE_MBA) {
+8 −6
Original line number Diff line number Diff line
@@ -87,10 +87,12 @@ int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,

/*
 * Check whether a cache bit mask is valid.
 * For Intel the SDM says:
 *	Please note that all (and only) contiguous '1' combinations
 *	are allowed (e.g. FFFFH, 0FF0H, 003CH, etc.).
 * Additionally Haswell requires at least two bits set.
 * On Intel CPUs, non-contiguous 1s value support is indicated by CPUID:
 *   - CPUID.0x10.1:ECX[3]: L3 non-contiguous 1s value supported if 1
 *   - CPUID.0x10.2:ECX[3]: L2 non-contiguous 1s value supported if 1
 *
 * Haswell does not support a non-contiguous 1s value and additionally
 * requires at least two bits set.
 * AMD allows non-contiguous bitmasks.
 */
static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
@@ -113,8 +115,8 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
	first_bit = find_first_bit(&val, cbm_len);
	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);

	/* Are non-contiguous bitmaps allowed? */
	if (!r->cache.arch_has_sparse_bitmaps &&
	/* Are non-contiguous bitmasks allowed? */
	if (!r->cache.arch_has_sparse_bitmasks &&
	    (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)) {
		rdt_last_cmd_printf("The mask %lx has non-consecutive 1-bits\n", val);
		return false;
+9 −0
Original line number Diff line number Diff line
@@ -492,6 +492,15 @@ union cpuid_0x10_3_eax {
	unsigned int full;
};

/* CPUID.(EAX=10H, ECX=ResID).ECX */
union cpuid_0x10_x_ecx {
	struct {
		unsigned int reserved:3;
		unsigned int noncont:1;
	} split;
	unsigned int full;
};

/* CPUID.(EAX=10H, ECX=ResID).EDX */
union cpuid_0x10_x_edx {
	struct {
+18 −0
Original line number Diff line number Diff line
@@ -1117,6 +1117,17 @@ static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type)
	}
}

static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of,
					struct seq_file *seq, void *v)
{
	struct resctrl_schema *s = of->kn->parent->priv;
	struct rdt_resource *r = s->res;

	seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks);

	return 0;
}

/**
 * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
 * @r: Resource to which domain instance @d belongs.
@@ -1839,6 +1850,13 @@ static struct rftype res_common_files[] = {
		.seq_show	= rdtgroup_size_show,
		.fflags		= RF_CTRL_BASE,
	},
	{
		.name		= "sparse_masks",
		.mode		= 0444,
		.kf_ops		= &rdtgroup_kf_single_ops,
		.seq_show	= rdt_has_sparse_bitmasks_show,
		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
	},

};

Loading