Unverified Commit eb0ce17f authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!15190 v2 fs/resctrl: Adapt to the hardware topology structures of RDT and MPAM

Merge Pull Request from: @ci-robot 
 
PR sync from: Zeng Heng <zengheng4@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/PW4Z7NPOLOCBMGYQ4LPM7UP4GXLYYXSI/ 
Compared with v1:

1. The V1's Patch 1 is split into V2's Patch 2 for the ARM MPAM part,
   while the resctrl common layer and the x86 RDT part remain unchanged
   with the original code;
2. The commit message of the V1's Patch 1 is updated with a new
   description;

Zeng Heng (9):
  fs/resctrl: Adapt to the hardware topology structures of RDT and MPAM
  arm64/mpam: Support MATA monitor feature for MPAM
  arm64/mpam: Add judgment to distinguish MSMON_MBWU_CAPTURE definition
  arm64/mpam: Fix out-of-bound access of mbwu_state array
  arm64/mpam: fix MBA granularity conversion formula
  arm64/mpam: fix bug in percent_to_mbw_max()
  arm64/mpam: Fix out-of-bound access of cfg array
  arm64/mpam: Improve conversion accuracy between percent and
    fixed-point fraction
  arm64/mpam: Add write memory barrier to guarantee monitor results


--
2.25.1
 
https://gitee.com/openeuler/kernel/issues/IAKAKN
https://gitee.com/openeuler/kernel/issues/I8T2RT
https://gitee.com/openeuler/kernel/issues/IAFGJ6 
 
Link:https://gitee.com/openeuler/kernel/pulls/15190

 

Reviewed-by: default avatarJason Zeng <jason.zeng@intel.com>
Reviewed-by: default avatarWang ShaoBo <bobo.shaobowang@huawei.com>
Signed-off-by: default avatarZhang Peng <zhangpeng362@huawei.com>
parents 88b69da4 4f096084
Loading
Loading
Loading
Loading
+52 −0
Original line number Diff line number Diff line
@@ -267,3 +267,55 @@ void __init intel_rdt_mbm_apply_quirk(void)
	mbm_cf_rmidthreshold = mbm_cf_table[cf_index].rmidthreshold;
	mbm_cf = mbm_cf_table[cf_index].cf;
}

static struct mon_evt llc_occupancy_event = {
	.name		= "llc_occupancy",
	.evtid		= QOS_L3_OCCUP_EVENT_ID,
};

static struct mon_evt mbm_total_event = {
	.name		= "mbm_total_bytes",
	.evtid		= QOS_L3_MBM_TOTAL_EVENT_ID,
};

static struct mon_evt mbm_local_event = {
	.name		= "mbm_local_bytes",
	.evtid		= QOS_L3_MBM_LOCAL_EVENT_ID,
};

/*
 * Initialize the event list for the resource.
 *
 * Note that MBM events are also part of RDT_RESOURCE_L3 resource
 * because as per the SDM the total and local memory bandwidth
 * are enumerated as part of L3 monitoring.
 */
static void l3_mon_evt_init(struct rdt_resource *r)
{
	INIT_LIST_HEAD(&r->evt_list);

	if (resctrl_arch_is_llc_occupancy_enabled())
		list_add_tail(&llc_occupancy_event.list, &r->evt_list);
	if (resctrl_arch_is_mbm_total_enabled())
		list_add_tail(&mbm_total_event.list, &r->evt_list);
	if (resctrl_arch_is_mbm_local_enabled())
		list_add_tail(&mbm_local_event.list, &r->evt_list);
}

int resctrl_arch_mon_resource_init(void)
{
	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);

	l3_mon_evt_init(r);

	if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) {
		mbm_total_event.configurable = true;
		mbm_config_rftype_init("mbm_total_bytes_config");
	}
	if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) {
		mbm_local_event.configurable = true;
		mbm_config_rftype_init("mbm_local_bytes_config");
	}

	return 0;
}
+34 −6
Original line number Diff line number Diff line
@@ -915,6 +915,12 @@ static u64 mpam_msmon_overflow_val(struct mpam_msc_ris *ris)
		return GENMASK_ULL(30, 0);
}

static const struct midr_range mbwu_flowrate_list[] = {
	MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
	MIDR_ALL_VERSIONS(MIDR_HISI_LINXICORE9100),
	{ /* sentinel */ }
};

static void __ris_msmon_read(void *arg)
{
	bool nrdy = false;
@@ -936,6 +942,9 @@ static void __ris_msmon_read(void *arg)
		  FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx);
	mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel);

	/* Selects a monitor instance to configure PARTID. */
	wmb();

	if (m->type == mpam_feat_msmon_mbwu) {
		mbwu_state = &ris->mbwu_state[ctx->mon];
		if (mbwu_state) {
@@ -956,6 +965,12 @@ static void __ris_msmon_read(void *arg)
	if (config_mismatch || reset_on_next_read)
		write_msmon_ctl_flt_vals(m, ctl_val, flt_val);

	/*
	 * Selects the monitor instance associated to the specified PARTID
	 * to read counter value.
	 */
	wmb();

	switch (m->type) {
	case mpam_feat_msmon_csu:
		now = mpam_read_monsel_reg(msc, CSU);
@@ -986,6 +1001,18 @@ static void __ris_msmon_read(void *arg)
		if (!mbwu_state)
			break;

		/*
		 * Following the definition of the DDI0598 version,
		 * the value field of MPAM Memory Bandwidth Usage Monitor Register
		 * indicates the memory bandwidth usage in bytes per second,
		 * instead the scaled count of bytes transferred since the monitor
		 * was last reset in the latest version (DDI0598D_b).
		 */
		if (ris->comp->class->type == MPAM_CLASS_MEMORY) {
			if (is_midr_in_range_list(read_cpuid_id(), mbwu_flowrate_list))
				break;
		}

		/* Add any pre-overflow value to the mbwu_state->val */
		if (mbwu_state->prev_val > now)
			overflow_val = mpam_msmon_overflow_val(ris) - mbwu_state->prev_val;
@@ -1229,7 +1256,7 @@ struct reprogram_ris {
/* Call with MSC lock held */
static int mpam_reprogram_ris(void *_arg)
{
	u16 partid, partid_max;
	u16 partid, num_partid;
	struct reprogram_ris *arg = _arg;
	struct mpam_msc_ris *ris = arg->ris;
	struct mpam_config *cfg = arg->cfg;
@@ -1238,9 +1265,9 @@ static int mpam_reprogram_ris(void *_arg)
		return 0;

	spin_lock(&partid_max_lock);
	partid_max = mpam_partid_max;
	num_partid = resctrl_arch_get_num_closid(NULL);
	spin_unlock(&partid_max_lock);
	for (partid = 0; partid < partid_max; partid++)
	for (partid = 0; partid < num_partid; partid++)
		mpam_reprogram_ris_partid(ris, partid, cfg);

	return 0;
@@ -1396,7 +1423,7 @@ static void mpam_reprogram_msc(struct mpam_msc *msc)
		}

		reset = true;
		for (partid = 0; partid < mpam_partid_max; partid++) {
		for (partid = 0; partid < resctrl_arch_get_num_closid(NULL); partid++) {
			cfg = &ris->comp->cfg[partid];
			if (cfg->features)
				reset = false;
@@ -2099,7 +2126,8 @@ static int __allocate_component_cfg(struct mpam_component *comp)
	if (comp->cfg)
		return 0;

	comp->cfg = kcalloc(mpam_partid_max, sizeof(*comp->cfg), GFP_KERNEL);
	comp->cfg = kcalloc(resctrl_arch_get_num_closid(NULL),
			    sizeof(*comp->cfg), GFP_KERNEL);
	if (!comp->cfg)
		return -ENOMEM;

@@ -2211,7 +2239,7 @@ void mpam_reset_class(struct mpam_class *class)

	idx = srcu_read_lock(&mpam_srcu);
	list_for_each_entry_rcu(comp, &class->components, class_list) {
		memset(comp->cfg, 0, (mpam_partid_max * sizeof(*comp->cfg)));
		memset(comp->cfg, 0, resctrl_arch_get_num_closid(NULL) * sizeof(*comp->cfg));

		list_for_each_entry_rcu(ris, &comp->ris, comp_list) {
			mutex_lock(&ris->msc->lock);
+103 −26
Original line number Diff line number Diff line
@@ -336,8 +336,10 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
{
	int err;
	u64 cdp_val;
	u16 num_mbwu_mon;
	struct mon_cfg cfg;
	struct mpam_resctrl_dom *dom;
	struct mpam_resctrl_res *res;
	u32 mon = *(u32 *)arch_mon_ctx;
	enum mpam_device_features type;

@@ -358,8 +360,16 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
	}

	cfg.mon = mon;
	if (cfg.mon == USE_RMID_IDX)
		cfg.mon = resctrl_arch_rmid_idx_encode(closid, rmid);
	if (cfg.mon == USE_RMID_IDX) {
		/*
		 * The number of mbwu monitors can't support free run mode,
		 * adapt the remainder of rmid to the num_mbwu_mon as a
		 * compromise.
		 */
		res = container_of(r, struct mpam_resctrl_res, resctrl_res);
		num_mbwu_mon = res->class->props.num_mbwu_mon;
		cfg.mon = resctrl_arch_rmid_idx_encode(closid, rmid) % num_mbwu_mon;
	}

	cfg.match_pmg = true;
	cfg.pmg = rmid;
@@ -386,13 +396,17 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
			     u32 closid, u32 rmid, enum resctrl_event_id eventid)
{
	u16 num_mbwu_mon;
	struct mon_cfg cfg;
	struct mpam_resctrl_dom *dom;
	struct mpam_resctrl_res *res;

	if (eventid != QOS_L3_MBM_LOCAL_EVENT_ID)
		return;

	cfg.mon = resctrl_arch_rmid_idx_encode(closid, rmid);
	res = container_of(r, struct mpam_resctrl_res, resctrl_res);
	num_mbwu_mon = res->class->props.num_mbwu_mon;
	cfg.mon = resctrl_arch_rmid_idx_encode(closid, rmid) % num_mbwu_mon;
	cfg.match_pmg = true;
	cfg.pmg = rmid;

@@ -473,14 +487,6 @@ static bool class_has_usable_mbwu(struct mpam_class *class)
	if (!mpam_has_feature(mpam_feat_msmon_mbwu, cprops))
		return false;

	/*
	 * resctrl expects the bandwidth counters to be free running,
	 * which means we need as many monitors as resctrl has
	 * control/monitor groups.
	 */
	if (cprops->num_mbwu_mon < resctrl_arch_system_num_rmid_idx())
		return false;

	return (mpam_partid_max > 1) || (mpam_pmg_max != 0);
}

@@ -513,7 +519,7 @@ static u32 get_mba_granularity(struct mpam_props *cprops)
		 * bwa_wd is the number of bits implemented in the 0.xxx
		 * fixed point fraction. 1 bit is 50%, 2 is 25% etc.
		 */
		return MAX_MBA_BW / (cprops->bwa_wd + 1);
		return MAX_MBA_BW / (1 << cprops->bwa_wd);
	}

	return 0;
@@ -530,21 +536,31 @@ static u32 mbw_pbm_to_percent(unsigned long mbw_pbm, struct mpam_props *cprops)
	return result;
}

static u32 mbw_max_to_percent(u16 mbw_max, struct mpam_props *cprops)
static int get_wd_precision(u8 wd)
{
	int ret = (1 << wd) / MAX_MBA_BW;

	if (!ret)
		return 1;

	return ret;
}

static u32 mbw_max_to_percent(u16 mbw_max, u8 wd)
{
	u8 bit;
	u32 divisor = 2, value = 0;
	u32 divisor = 2, value = 0, precision = get_wd_precision(wd);

	for (bit = 15; bit; bit--) {
		if (mbw_max & BIT(bit))
			value += MAX_MBA_BW / divisor;
			value += MAX_MBA_BW * precision / divisor;
		divisor <<= 1;
	}

	return value;
	return DIV_ROUND_UP(value, precision);
}

static u32 percent_to_mbw_pbm(u8 pc, struct mpam_props *cprops)
static u32 percent_to_mbw_pbm(u32 pc, struct mpam_props *cprops)
{
	u32 granularity = get_mba_granularity(cprops);
	u8 num_bits = pc / granularity;
@@ -556,26 +572,28 @@ static u32 percent_to_mbw_pbm(u8 pc, struct mpam_props *cprops)
	return (1 << num_bits) - 1;
}

static u16 percent_to_mbw_max(u8 pc, struct mpam_props *cprops)
static u16 percent_to_mbw_max(u32 pc, u8 wd)
{
	u8 bit;
	u32 divisor = 2, value = 0;
	u32 divisor = 2, value = 0, precision = get_wd_precision(wd);

	if (WARN_ON_ONCE(cprops->bwa_wd > 15))
	if (WARN_ON_ONCE(wd > 15))
		return MAX_MBA_BW;

	pc *= precision;

	for (bit = 15; bit; bit--) {
		if (pc >= MAX_MBA_BW / divisor) {
			pc -= MAX_MBA_BW / divisor;
		if (pc >= MAX_MBA_BW * precision / divisor) {
			pc -= MAX_MBA_BW * precision / divisor;
			value |= BIT(bit);
		}
		divisor <<= 1;

		if (!pc || !(MAX_MBA_BW / divisor))
		if (!pc || !(MAX_MBA_BW * precision / divisor))
			break;
	}

	value &= GENMASK(15, 15 - cprops->bwa_wd);
	value &= GENMASK(15, 15 - wd + 1);

	return value;
}
@@ -941,7 +959,7 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
		/* TODO: Scaling is not yet supported */
		return mbw_pbm_to_percent(cfg->mbw_pbm, cprops);
	case mpam_feat_mbw_max:
		return mbw_max_to_percent(cfg->mbw_max, cprops);
		return mbw_max_to_percent(cfg->mbw_max, cprops->bwa_wd);
	default:
		return -EINVAL;
	}
@@ -983,7 +1001,7 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
			mpam_set_feature(mpam_feat_mbw_part, &cfg);
			break;
		} else if (mpam_has_feature(mpam_feat_mbw_max, cprops)) {
			cfg.mbw_max = percent_to_mbw_max(cfg_val, cprops);
			cfg.mbw_max = percent_to_mbw_max(cfg_val, cprops->bwa_wd);
			mpam_set_feature(mpam_feat_mbw_max, &cfg);
			break;
		}
@@ -1199,6 +1217,65 @@ int mpam_resctrl_offline_cpu(unsigned int cpu)
	return 0;
}

static struct mon_evt llc_occupancy_event = {
	.name		= "llc_occupancy",
	.evtid		= QOS_L3_OCCUP_EVENT_ID,
};

static struct mon_evt mbm_total_event = {
	.name		= "mbm_total_bytes",
	.evtid		= QOS_L3_MBM_TOTAL_EVENT_ID,
};

static struct mon_evt mbm_local_event = {
	.name		= "mbm_local_bytes",
	.evtid		= QOS_L3_MBM_LOCAL_EVENT_ID,
};

/*
 * Initialize the event list for the resource.
 *
 * Note that MBM events are also part of RDT_RESOURCE_L3 resource
 * because as per the SDM the total and local memory bandwidth
 * are enumerated as part of L3 monitoring.
 */
static void l3_mon_evt_init(struct rdt_resource *r)
{
	INIT_LIST_HEAD(&r->evt_list);

	if (!r->mon_capable)
		return;

	if (r->rid == RDT_RESOURCE_L3) {
		if (resctrl_arch_is_llc_occupancy_enabled())
			list_add_tail(&llc_occupancy_event.list, &r->evt_list);

		if (resctrl_arch_is_mbm_local_enabled())
			list_add_tail(&mbm_local_event.list, &r->evt_list);
	}

	if ((r->rid == RDT_RESOURCE_MBA) &&
	     resctrl_arch_is_mbm_total_enabled())
		list_add_tail(&mbm_total_event.list, &r->evt_list);
}

int resctrl_arch_mon_resource_init(void)
{
	l3_mon_evt_init(resctrl_arch_get_resource(RDT_RESOURCE_L3));
	l3_mon_evt_init(resctrl_arch_get_resource(RDT_RESOURCE_MBA));

	if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) {
		mbm_total_event.configurable = true;
		mbm_config_rftype_init("mbm_total_bytes_config");
	}
	if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) {
		mbm_local_event.configurable = true;
		mbm_config_rftype_init("mbm_local_bytes_config");
	}

	return 0;
}

static int __init __cacheinfo_ready(void)
{
	cacheinfo_ready = true;
+0 −15
Original line number Diff line number Diff line
@@ -66,20 +66,6 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
	return container_of(kfc, struct rdt_fs_context, kfc);
}

/**
 * struct mon_evt - Entry in the event list of a resource
 * @evtid:		event id
 * @name:		name of the event
 * @configurable:	true if the event is configurable
 * @list:		entry in &rdt_resource->evt_list
 */
struct mon_evt {
	enum resctrl_event_id	evtid;
	char			*name;
	bool			configurable;
	struct list_head	list;
};

/**
 * union mon_data_bits - Monitoring details for each event file
 * @priv:              Used to store monitoring event data in @u
@@ -306,7 +292,6 @@ void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms,
void cqm_handle_limbo(struct work_struct *work);
bool has_busy_rmid(struct rdt_domain *d);
void __check_limbo(struct rdt_domain *d, bool force_free);
void mbm_config_rftype_init(const char *config);
void rdt_staged_configs_clear(void);
int resctrl_find_cleanest_closid(void);

+1 −46
Original line number Diff line number Diff line
@@ -791,40 +791,6 @@ static void dom_data_exit(struct rdt_resource *r)
	mutex_unlock(&rdtgroup_mutex);
}

static struct mon_evt llc_occupancy_event = {
	.name		= "llc_occupancy",
	.evtid		= QOS_L3_OCCUP_EVENT_ID,
};

static struct mon_evt mbm_total_event = {
	.name		= "mbm_total_bytes",
	.evtid		= QOS_L3_MBM_TOTAL_EVENT_ID,
};

static struct mon_evt mbm_local_event = {
	.name		= "mbm_local_bytes",
	.evtid		= QOS_L3_MBM_LOCAL_EVENT_ID,
};

/*
 * Initialize the event list for the resource.
 *
 * Note that MBM events are also part of RDT_RESOURCE_L3 resource
 * because as per the SDM the total and local memory bandwidth
 * are enumerated as part of L3 monitoring.
 */
static void l3_mon_evt_init(struct rdt_resource *r)
{
	INIT_LIST_HEAD(&r->evt_list);

	if (resctrl_arch_is_llc_occupancy_enabled())
		list_add_tail(&llc_occupancy_event.list, &r->evt_list);
	if (resctrl_arch_is_mbm_total_enabled())
		list_add_tail(&mbm_total_event.list, &r->evt_list);
	if (resctrl_arch_is_mbm_local_enabled())
		list_add_tail(&mbm_local_event.list, &r->evt_list);
}

int resctrl_mon_resource_init(void)
{
	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
@@ -837,18 +803,7 @@ int resctrl_mon_resource_init(void)
	if (ret)
		return ret;

	l3_mon_evt_init(r);

	if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) {
		mbm_total_event.configurable = true;
		mbm_config_rftype_init("mbm_total_bytes_config");
	}
	if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) {
		mbm_local_event.configurable = true;
		mbm_config_rftype_init("mbm_local_bytes_config");
	}

	return 0;
	return resctrl_arch_mon_resource_init();
}

void resctrl_mon_resource_exit(void)
Loading